Skip to content
Snippets Groups Projects
Commit d293a5ee authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

add class1 mdoels download

parent 32ce51d3
No related merge requests found
......@@ -150,7 +150,7 @@ class Class1BindingPredictor(object):
self.hyperparameters['random_negative_rate'] +
self.hyperparameters['random_negative_constant'])
num_random_negative = pandas.Series(num_random_negative)
logging.info("Random negative counts per length: %s" % (
logging.info("Random negative counts per length:\n%s" % (
str(num_random_negative)))
aa_distribution = None
......@@ -160,7 +160,7 @@ class Class1BindingPredictor(object):
smoothing=self.hyperparameters[
'random_negative_distribution_smoothing'])
logging.info(
"Using amino acid distribution for random negative: %s" % (
"Using amino acid distribution for random negative:\n%s" % (
str(aa_distribution)))
y_values = from_ic50(affinities)
......@@ -224,17 +224,14 @@ class Class1BindingPredictor(object):
"peptide": numpy.concatenate([
random_negative_peptides_encoding,
peptide_encoding,
])
]) if len(random_negative_peptides_encoding) > 0
else peptide_encoding
}
if pseudosequence_length:
# TODO: add random pseudosequences for random negative peptides
raise NotImplemented(
"Allele pseudosequences unsupported with random negatives")
logging.info("Epoch %3d / %3d. Min val loss at epoch %s" % (
i,
self.hyperparameters['max_epochs'],
min_val_loss_iteration))
fit_history = self.network.fit(
x_dict_with_random_negatives,
y_dict_with_random_negatives,
......@@ -248,6 +245,13 @@ class Class1BindingPredictor(object):
for (key, value) in fit_history.history.items():
self.fit_history[key].extend(value)
logging.info(
"Epoch %3d / %3d: loss=%g. Min val loss at epoch %s" % (
i,
self.hyperparameters['max_epochs'],
self.fit_history['loss'][-1],
min_val_loss_iteration))
if self.hyperparameters['validation_split']:
val_loss = fit_history.history['val_loss'][-1]
val_losses.append(val_loss)
......@@ -273,7 +277,8 @@ class Class1BindingPredictor(object):
pseudosequences_input = self.pseudosequence_to_network_input(
allele_pseudosequences)
x_dict['pseudosequence'] = pseudosequences_input
return numpy.array(self.network.predict(x_dict))
(predictions,) = numpy.array(self.network.predict(x_dict)).T
return to_ic50(predictions)
@staticmethod
def make_network(
......
......@@ -20,6 +20,14 @@ releases:
1.0.0:
compatibility-version: 2
downloads:
- name: models_class1
url: http://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/models_class1.tar.bz2
default: true
- name: data_curated
url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/data_curated.tar.bz2
default: true
- name: data_kim2014
url: http://github.com/hammerlab/mhcflurry/releases/download/0.0.8/data_kim2014.tar.bz2
default: false
......@@ -28,10 +36,6 @@ releases:
url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/data_iedb.tar.bz2
default: false
- name: data_curated
url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/data_curated.tar.bz2
default: true
0.2.0:
compatibility-version: 1
downloads:
......
import numpy as np
np.random.seed(0)
import numpy
import pandas
numpy.random.seed(0)
from mhcflurry.affinity_measurement_dataset import AffinityMeasurementDataset
from mhcflurry import Class1BindingPredictor
from nose.tools import eq_
......@@ -11,27 +11,32 @@ from mhcflurry.downloads import get_path
def test_class1_binding_predictor_A0205_training_accuracy():
dataset = AffinityMeasurementDataset.from_csv(get_path(
"data_combined_iedb_kim2014", "combined_human_class1_dataset.csv"))
dataset_a0205_all_lengths = dataset.get_allele("HLA-A0205")
dataset_a0205 = AffinityMeasurementDataset(
dataset_a0205_all_lengths._df.ix[
dataset_a0205_all_lengths._df.peptide.str.len() == 9])
df = pandas.read_csv(
get_path(
"data_curated", "curated_training_data.csv.bz2"))
df = df.ix[df.allele == "HLA-A*02:05"]
df = df.ix[
df.peptide.str.len() == 9
]
df = df.ix[
df.measurement_type == "quantitative"
]
df = df.ix[
df.measurement_source == "kim2014"
]
predictor = Class1BindingPredictor(
name="A0205",
embedding_output_dim=32,
activation="tanh",
layer_sizes=[64],
optimizer="adam",
max_epochs=1000, # Memorize the dataset.
early_stopping=False,
dropout_probability=0.0)
predictor.fit_dataset(dataset_a0205, n_training_epochs=1000)
peptides = dataset_a0205.peptides
ic50_pred = predictor.predict(peptides)
ic50_true = dataset_a0205.affinities
predictor.fit(df.peptide.values, df.measurement_value.values)
ic50_pred = predictor.predict(df.peptide.values)
ic50_true = df.measurement_value.values
eq_(len(ic50_pred), len(ic50_true))
testing.assert_allclose(
np.log(ic50_pred),
np.log(ic50_true),
numpy.log(ic50_pred),
numpy.log(ic50_true),
rtol=0.2,
atol=0.2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment