Skip to content
Snippets Groups Projects
Commit 6cac6ebd authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

updates

parent dd062baa
No related merge requests found
......@@ -3,6 +3,7 @@ import time
import hashlib
import json
from os.path import join, exists
from six import string_types
import numpy
import pandas
......@@ -80,7 +81,7 @@ class Class1AffinityPredictor(object):
def weights_path(models_dir, model_name):
return join(
models_dir,
"%s.%s" % (
"weights_%s.%s" % (
model_name, Class1NeuralNetwork.weights_filename_extension))
......@@ -228,66 +229,67 @@ class Class1AffinityPredictor(object):
verbose=verbose)
yield model
def predict(
def predict(self, peptides, alleles=None, allele=None):
df = self.predict_to_dataframe(
peptides=peptides,
alleles=alleles,
allele=allele
)
return df.prediction.values
def predict_to_dataframe(
self,
peptides,
alleles,
include_mean=True,
include_peptides_and_alleles=True):
input_df = pandas.DataFrame({
alleles=None,
allele=None,
include_individual_model_predictions=False):
if isinstance(peptides, string_types):
raise TypeError("peptides must be a list or array, not a string")
if isinstance(alleles, string_types):
raise TypeError("alleles must be a list or array, not a string")
if allele is not None:
if alleles is not None:
raise ValueError("Specify exactly one of allele or alleles")
alleles = [allele] * len(peptides)
df = pandas.DataFrame({
'peptide': peptides,
'allele': alleles,
})
input_df["allele"] = input_df.allele.map(
df["normalized_allele"] = input_df.allele.map(
mhcnames.normalize_allele_name)
result_dataframes = []
if self.class1_pan_allele_models:
allele_pseudosequences = input_df.allele.map(
allele_pseudosequences = df.normalized_allele.map(
self.allele_to_pseudosequence)
encodable_peptides = EncodableSequences.create(
input_df.peptide.values)
for model in self.class1_pan_allele_models:
result_df = pandas.DataFrame(
model.predict(
encodable_peptides,
allele_pseudosequences=allele_pseudosequences))
result_dataframes.append(result_df)
df.peptide.values)
for (i, model) in enumerate(self.class1_pan_allele_models):
df["model_pan_%d" % i] = model.predict(
encodable_peptides,
allele_pseudosequences=allele_pseudosequences)
if self.allele_to_allele_specific_models:
for allele in input_df.allele.unique():
mask = (input_df.allele == allele).values
for allele in df.normalized_allele.unique():
mask = (df.normalized_allele == allele).values
allele_peptides = EncodableSequences.create(
input_df.ix[mask].peptide.values)
df.ix[mask].peptide.values)
models = self.allele_to_allele_specific_models.get(allele, [])
for model in models:
result_df = pandas.DataFrame(
model.predict(allele_peptides),
index=input_df.index[mask].values)
result_dataframes.append(result_df)
model_predictions = pandas.Panel(
dict(enumerate(result_dataframes)),
major_axis=input_df.index)
for (i, model) in enumerate(models):
df.loc[mask, "model_single_%d" % i] = model.predict(
allele_peptides)
# Geometric mean
log_means = numpy.log(model_predictions).mean(0)
first_columns = []
if include_mean:
log_means["mean"] = log_means.mean(1)
first_columns.append("mean")
result = numpy.exp(log_means)
if include_peptides_and_alleles:
result["peptide"] = input_df.peptide.values
result["allele"] = input_df.allele.values
first_columns.append("allele")
first_columns.append("peptide")
assert len(result) == len(peptides), result.shape
return result[
list(reversed(first_columns)) +
[c for c in result.columns if c not in first_columns]
df_predictions = df[
[c for c in df.columns if c.startswith("model_")]
]
log_means = numpy.log(df_predictions).mean(1)
df["prediction"] = numpy.exp(log_means)
df["prediction_low"] = numpy.exp(log_means.quantile(q=.05, axis=1))
df["prediction_high"] = numpy.exp(log_means.quantile(q=.05, axis=1))
if include_individual_model_predictions:
return df
return df[
[c for c in df.columns if c not in df_predictions.columns]
]
\ No newline at end of file
......@@ -8,7 +8,7 @@ import sklearn
import numpy
import scipy
from ..regression_target import ic50_to_regression_target
from mhcflurry.regression_target import ic50_to_regression_target
def make_scores(
......
......@@ -2,35 +2,39 @@ import numpy
import pandas
numpy.random.seed(0)
from mhcflurry import Class1NeuralNetwork
from mhcflurry import Class1NeuralNetwork, Class1AffinityPredictor
from nose.tools import eq_
from numpy import testing
from mhcflurry.downloads import get_path
allele = "HLA-A*02:05"
def test_class1_binding_predictor_A0205_training_accuracy():
df = pandas.read_csv(
df = pandas.read_csv(
get_path(
"data_curated", "curated_training_data.csv.bz2"))
df = df.ix[df.allele == "HLA-A*02:05"]
df = df.ix[
df.peptide.str.len() == 9
]
df = df.ix[
df.measurement_type == "quantitative"
]
df = df.ix[
df.measurement_source == "kim2014"
]
predictor = Class1NeuralNetwork(
activation="tanh",
layer_sizes=[64],
max_epochs=1000, # Memorize the dataset.
early_stopping=False,
dropout_probability=0.0)
df = df.ix[df.allele == allele]
df = df.ix[
df.peptide.str.len() == 9
]
df = df.ix[
df.measurement_type == "quantitative"
]
df = df.ix[
df.measurement_source == "kim2014"
]
hyperparameters = dict(
activation="tanh",
layer_sizes=[64],
max_epochs=1000, # Memorize the dataset.
early_stopping=False,
dropout_probability=0.0)
def test_class1_neural_network_A0205_training_accuracy():
predictor = Class1NeuralNetwork(**hyperparameters)
predictor.fit(df.peptide.values, df.measurement_value.values)
ic50_pred = predictor.predict(df.peptide.values)
ic50_true = df.measurement_value.values
......@@ -40,3 +44,23 @@ def test_class1_binding_predictor_A0205_training_accuracy():
numpy.log(ic50_true),
rtol=0.2,
atol=0.2)
def test_class1_neural_network_A0205_training_accuracy():
predictor = Class1AffinityPredictor()
predictor.fit_allele_specific_predictors(
n_models=1,
architecture_hyperparameters=hyperparameters,
allele=allele,
peptides=df.peptide.values,
affinities=df.measurement_value.values,
)
ic50_pred = predictor.predict(df.peptide.values, allele=allele)
ic50_true = df.measurement_value.values
eq_(len(ic50_pred), len(ic50_true))
testing.assert_allclose(
numpy.log(ic50_pred),
numpy.log(ic50_true),
rtol=0.2,
atol=0.2)
import tempfile
import shutil
import os
import time
import cProfile
import json
from os.path import join
import os
import shutil
import tempfile
import time
from os import mkdir
from os.path import join
from numpy.testing import assert_allclose, assert_equal
import numpy
from nose.tools import eq_
from . import make_random_peptides
from mhcflurry.class1_affinity_prediction import scoring
from mhcflurry.measurement_collection import MeasurementCollection
from mhcflurry.class1_allele_specific_ensemble import train_command
from mhcflurry import scoring
from mhcflurry.affinity_measurement_dataset import AffinityMeasurementDataset
from mhcflurry.downloads import get_path
from mhcflurry.amino_acid import common_amino_acid_letters
from mhcflurry.class1_allele_specific_ensemble import train_command
from mhcflurry \
.class1_allele_specific_ensemble \
.class1_ensemble_multi_allele_predictor import (
Class1EnsembleMultiAllelePredictor,
get_downloaded_predictor,
HYPERPARAMETER_DEFAULTS)
Class1EnsembleMultiAllelePredictor,
get_downloaded_predictor,
HYPERPARAMETER_DEFAULTS)
from mhcflurry.downloads import get_path
from mhcflurry.measurement_collection import MeasurementCollection
from nose.tools import eq_
from numpy.testing import assert_allclose, assert_equal
from . import make_random_peptides
def test_single_allele():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment