Newer
Older
from nose.tools import eq_, assert_raises
from numpy import testing
from mhcflurry.downloads import get_path
# To hunt down a weird warning we were seeing in pandas.
def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
log = file if hasattr(file,'write') else sys.stderr
traceback.print_stack(file=log)
log.write(warnings.formatwarning(message, category, filename, lineno, line))
warnings.showwarning = warn_with_traceback
def debug():
print("\n%s" % (
predictor.predict_to_dataframe(
peptides=[peptide],
allele=allele,
include_individual_model_predictions=True)))
(prediction,) = predictor.predict(allele=allele, peptides=[peptide])
assert prediction >= expected_range[0], (predictor, prediction, debug())
assert prediction <= expected_range[1], (predictor, prediction, debug())
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
allele = "HLA-A*01:01"
df = pandas.read_csv(
get_path(
"data_curated", "curated_training_data.csv.bz2"))
df = df.ix[
(df.allele == allele) &
(df.peptide.str.len() >= 8) &
(df.peptide.str.len() <= 15)
]
hyperparameters = {
"max_epochs": 500,
"patience": 10,
"early_stopping": True,
"validation_split": 0.2,
"random_negative_rate": 0.0,
"random_negative_constant": 25,
"use_embedding": False,
"kmer_size": 15,
"batch_normalization": False,
"locally_connected_layers": [
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
},
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
}
],
"activation": "relu",
"output_activation": "sigmoid",
"layer_sizes": [
32
],
"random_negative_affinity_min": 20000.0,
"random_negative_affinity_max": 50000.0,
"dense_layer_l1_regularization": 0.001,
"dropout_probability": 0.0
}
predictor = Class1AffinityPredictor()
predictor.fit_allele_specific_predictors(
n_models=2,
architecture_hyperparameters=hyperparameters,
allele=allele,
peptides=df.peptide.values,
affinities=df.measurement_value.values,
)
predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor)
models_dir = tempfile.mkdtemp("_models")
print(models_dir)
predictor.save(models_dir)
predictor2 = Class1AffinityPredictor.load(models_dir)
predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor2)
shutil.rmtree(models_dir)
predictor3 = Class1AffinityPredictor(
allele_to_allele_specific_models={
allele: [predictor.allele_to_allele_specific_models[allele][0]]
})
predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor3)
models_dir = tempfile.mkdtemp("_models")
print(models_dir)
predictor3.save(models_dir)
predictor4 = Class1AffinityPredictor.load(models_dir)
predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor4)
def test_class1_affinity_predictor_a0205_memorize_training_data():
# Memorize the dataset.
hyperparameters = dict(
activation="tanh",
layer_sizes=[64],
max_epochs=500,
early_stopping=False,
validation_split=0.0,
locally_connected_layers=[],
dense_layer_l1_regularization=0.0,
dropout_probability=0.0)
# First test a Class1NeuralNetwork, then a Class1AffinityPredictor.
allele = "HLA-A*02:05"
df = pandas.read_csv(
get_path(
"data_curated", "curated_training_data.csv.bz2"))
df = df.ix[
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
]
df = df.ix[
df.peptide.str.len() == 9
]
df = df.ix[
df.measurement_type == "quantitative"
]
df = df.ix[
df.measurement_source == "kim2014"
]
predictor = Class1AffinityPredictor()
predictor.fit_allele_specific_predictors(
n_models=2,
architecture_hyperparameters=hyperparameters,
allele=allele,
peptides=df.peptide.values,
affinities=df.measurement_value.values,
)
ic50_pred = predictor.predict(df.peptide.values, allele=allele)
ic50_true = df.measurement_value.values
eq_(len(ic50_pred), len(ic50_true))
testing.assert_allclose(
numpy.log(ic50_pred),
numpy.log(ic50_true),
rtol=0.2,
atol=0.2)
ic50_pred_df = predictor.predict_to_dataframe(
df.peptide.values, allele=allele)
print(ic50_pred_df)
ic50_pred_df2 = predictor.predict_to_dataframe(
df.peptide.values,
allele=allele,
include_individual_model_predictions=True)
print(ic50_pred_df2)
# Test an unknown allele
print("Starting unknown allele check")
eq_(predictor.supported_alleles, [allele])
ic50_pred = predictor.predict(
df.peptide.values,
allele="HLA-A*02:01",
throw=False)
assert numpy.isnan(ic50_pred).all()
assert_raises(
ValueError,
predictor.predict,
df.peptide.values,
allele="HLA-A*02:01")
eq_(predictor.supported_alleles, [allele])
assert_raises(
ValueError,
predictor.predict,
["AAAAA"], # too short
allele=allele)
assert_raises(
ValueError,
predictor.predict,
["AAAAAAAAAAAAAAAAAAAA"], # too long
allele=allele)
ic50_pred = predictor.predict(
["AAAAA", "AAAAAAAAA", "AAAAAAAAAAAAAAAAAAAA"],
allele=allele,
throw=False)
assert numpy.isnan(ic50_pred[0])
assert not numpy.isnan(ic50_pred[1])
assert numpy.isnan(ic50_pred[2])