From d293a5ee7d5545602731265e3a4c3895ca644e4d Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Thu, 18 May 2017 22:20:01 -0400 Subject: [PATCH] add class1 mdoels download --- .../class1_binding_predictor.py | 21 ++++++---- mhcflurry/downloads.yml | 12 ++++-- test/test_class1_binding_predictor_A0205.py | 41 +++++++++++-------- 3 files changed, 44 insertions(+), 30 deletions(-) diff --git a/mhcflurry/class1_affinity_prediction/class1_binding_predictor.py b/mhcflurry/class1_affinity_prediction/class1_binding_predictor.py index 1b569929..04b87d06 100644 --- a/mhcflurry/class1_affinity_prediction/class1_binding_predictor.py +++ b/mhcflurry/class1_affinity_prediction/class1_binding_predictor.py @@ -150,7 +150,7 @@ class Class1BindingPredictor(object): self.hyperparameters['random_negative_rate'] + self.hyperparameters['random_negative_constant']) num_random_negative = pandas.Series(num_random_negative) - logging.info("Random negative counts per length: %s" % ( + logging.info("Random negative counts per length:\n%s" % ( str(num_random_negative))) aa_distribution = None @@ -160,7 +160,7 @@ class Class1BindingPredictor(object): smoothing=self.hyperparameters[ 'random_negative_distribution_smoothing']) logging.info( - "Using amino acid distribution for random negative: %s" % ( + "Using amino acid distribution for random negative:\n%s" % ( str(aa_distribution))) y_values = from_ic50(affinities) @@ -224,17 +224,14 @@ class Class1BindingPredictor(object): "peptide": numpy.concatenate([ random_negative_peptides_encoding, peptide_encoding, - ]) + ]) if len(random_negative_peptides_encoding) > 0 + else peptide_encoding } if pseudosequence_length: # TODO: add random pseudosequences for random negative peptides raise NotImplemented( "Allele pseudosequences unsupported with random negatives") - logging.info("Epoch %3d / %3d. Min val loss at epoch %s" % ( - i, - self.hyperparameters['max_epochs'], - min_val_loss_iteration)) fit_history = self.network.fit( x_dict_with_random_negatives, y_dict_with_random_negatives, @@ -248,6 +245,13 @@ class Class1BindingPredictor(object): for (key, value) in fit_history.history.items(): self.fit_history[key].extend(value) + logging.info( + "Epoch %3d / %3d: loss=%g. Min val loss at epoch %s" % ( + i, + self.hyperparameters['max_epochs'], + self.fit_history['loss'][-1], + min_val_loss_iteration)) + if self.hyperparameters['validation_split']: val_loss = fit_history.history['val_loss'][-1] val_losses.append(val_loss) @@ -273,7 +277,8 @@ class Class1BindingPredictor(object): pseudosequences_input = self.pseudosequence_to_network_input( allele_pseudosequences) x_dict['pseudosequence'] = pseudosequences_input - return numpy.array(self.network.predict(x_dict)) + (predictions,) = numpy.array(self.network.predict(x_dict)).T + return to_ic50(predictions) @staticmethod def make_network( diff --git a/mhcflurry/downloads.yml b/mhcflurry/downloads.yml index 76f420fb..703ca938 100644 --- a/mhcflurry/downloads.yml +++ b/mhcflurry/downloads.yml @@ -20,6 +20,14 @@ releases: 1.0.0: compatibility-version: 2 downloads: + - name: models_class1 + url: http://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/models_class1.tar.bz2 + default: true + + - name: data_curated + url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/data_curated.tar.bz2 + default: true + - name: data_kim2014 url: http://github.com/hammerlab/mhcflurry/releases/download/0.0.8/data_kim2014.tar.bz2 default: false @@ -28,10 +36,6 @@ releases: url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/data_iedb.tar.bz2 default: false - - name: data_curated - url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/data_curated.tar.bz2 - default: true - 0.2.0: compatibility-version: 1 downloads: diff --git a/test/test_class1_binding_predictor_A0205.py b/test/test_class1_binding_predictor_A0205.py index fb3df4a7..451d3ca7 100644 --- a/test/test_class1_binding_predictor_A0205.py +++ b/test/test_class1_binding_predictor_A0205.py @@ -1,7 +1,7 @@ -import numpy as np -np.random.seed(0) +import numpy +import pandas +numpy.random.seed(0) -from mhcflurry.affinity_measurement_dataset import AffinityMeasurementDataset from mhcflurry import Class1BindingPredictor from nose.tools import eq_ @@ -11,27 +11,32 @@ from mhcflurry.downloads import get_path def test_class1_binding_predictor_A0205_training_accuracy(): - dataset = AffinityMeasurementDataset.from_csv(get_path( - "data_combined_iedb_kim2014", "combined_human_class1_dataset.csv")) - dataset_a0205_all_lengths = dataset.get_allele("HLA-A0205") - dataset_a0205 = AffinityMeasurementDataset( - dataset_a0205_all_lengths._df.ix[ - dataset_a0205_all_lengths._df.peptide.str.len() == 9]) + df = pandas.read_csv( + get_path( + "data_curated", "curated_training_data.csv.bz2")) + df = df.ix[df.allele == "HLA-A*02:05"] + df = df.ix[ + df.peptide.str.len() == 9 + ] + df = df.ix[ + df.measurement_type == "quantitative" + ] + df = df.ix[ + df.measurement_source == "kim2014" + ] predictor = Class1BindingPredictor( - name="A0205", - embedding_output_dim=32, activation="tanh", layer_sizes=[64], - optimizer="adam", + max_epochs=1000, # Memorize the dataset. + early_stopping=False, dropout_probability=0.0) - predictor.fit_dataset(dataset_a0205, n_training_epochs=1000) - peptides = dataset_a0205.peptides - ic50_pred = predictor.predict(peptides) - ic50_true = dataset_a0205.affinities + predictor.fit(df.peptide.values, df.measurement_value.values) + ic50_pred = predictor.predict(df.peptide.values) + ic50_true = df.measurement_value.values eq_(len(ic50_pred), len(ic50_true)) testing.assert_allclose( - np.log(ic50_pred), - np.log(ic50_true), + numpy.log(ic50_pred), + numpy.log(ic50_true), rtol=0.2, atol=0.2) -- GitLab