Skip to content
Snippets Groups Projects
Commit d1ef4aae authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

add test

parent 0699a570
No related merge requests found
...@@ -145,7 +145,7 @@ def load_data_iedb(iedb_csv, include_qualitative=True, include_mass_spec=False): ...@@ -145,7 +145,7 @@ def load_data_iedb(iedb_csv, include_qualitative=True, include_mass_spec=False):
quantitative["measurement_type"] = "quantitative" quantitative["measurement_type"] = "quantitative"
quantitative["measurement_inequality"] = quantitative[ quantitative["measurement_inequality"] = quantitative[
"Measurement Inequality" "Measurement Inequality"
].fillna("=") ].fillna("=").map(lambda s: {">=": ">", "<=": "<"}.get(s, s))
print("Quantitative measurements: %d" % len(quantitative)) print("Quantitative measurements: %d" % len(quantitative))
qualitative = iedb_df.ix[iedb_df["Units"].isnull()].copy() qualitative = iedb_df.ix[iedb_df["Units"].isnull()].copy()
......
...@@ -41,7 +41,7 @@ releases: ...@@ -41,7 +41,7 @@ releases:
default: false default: false
- name: data_curated - name: data_curated
url: https://github.com/openvax/mhcflurry/releases/download/pan-dev1/data_curated.20190514.tar.bz2 url: https://github.com/openvax/mhcflurry/releases/download/pan-dev1/data_curated.20190516.tar.bz2
default: true default: true
# Older downloads # Older downloads
......
...@@ -246,12 +246,6 @@ def main(args): ...@@ -246,12 +246,6 @@ def main(args):
print("Data inequalities:") print("Data inequalities:")
print(df.measurement_inequality.value_counts()) print(df.measurement_inequality.value_counts())
df.measurement_inequality = df.measurement_inequality.map(lambda s: {
">=": ">", "<=": "<"
}.get(s, s))
print("Data inequalities after adjustment:")
print(df.measurement_inequality.value_counts())
if args.ignore_inequalities and "measurement_inequality" in df.columns: if args.ignore_inequalities and "measurement_inequality" in df.columns:
print("Dropping measurement_inequality column") print("Dropping measurement_inequality column")
......
"""
Tests for training and predicting using Class1 pan-allele models.
"""
import json
import os
import shutil
import tempfile
import subprocess
from copy import deepcopy
import pandas
from numpy.testing import assert_array_less, assert_equal
from mhcflurry import Class1AffinityPredictor,Class1NeuralNetwork
from mhcflurry.allele_encoding import AlleleEncoding
from mhcflurry.downloads import get_path
HYPERPARAMETERS = {
'activation': 'tanh',
'allele_dense_layer_sizes': [],
'batch_normalization': False,
'dense_layer_l1_regularization': 0.0,
'dense_layer_l2_regularization': 0.0,
'dropout_probability': 0.5,
'early_stopping': True,
'init': 'glorot_uniform',
'layer_sizes': [64],
'learning_rate': None,
'locally_connected_layers': [],
'loss': 'custom:mse_with_inequalities',
'max_epochs': 5000,
'minibatch_size': 128,
'optimizer': 'rmsprop',
'output_activation': 'sigmoid',
'patience': 20,
'peptide_allele_merge_activation': '',
'peptide_allele_merge_method': 'concatenate',
'peptide_amino_acid_encoding': 'BLOSUM62',
'peptide_dense_layer_sizes': [],
'peptide_encoding': {
'alignment_method': 'left_pad_centered_right_pad',
'max_length': 15,
'vector_encoding_name': 'BLOSUM62',
},
'random_negative_affinity_max': 50000.0,
'random_negative_affinity_min': 20000.0,
'random_negative_constant': 25,
'random_negative_distribution_smoothing': 0.0,
'random_negative_match_distribution': True,
'random_negative_rate': 0.2,
'train_data': {},
'validation_split': 0.1,
}
ALLELE_TO_SEQUENCE = pandas.read_csv(
get_path(
"allele_sequences", "allele_sequences.csv"),
index_col=0).sequence.to_dict()
TRAIN_DF = pandas.read_csv(
get_path(
"data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
TRAIN_DF = TRAIN_DF.loc[TRAIN_DF.allele.isin(ALLELE_TO_SEQUENCE)]
TRAIN_DF = TRAIN_DF.loc[TRAIN_DF.peptide.str.len() >= 8]
TRAIN_DF = TRAIN_DF.loc[TRAIN_DF.peptide.str.len() <= 15]
def test_train_simple():
network = Class1NeuralNetwork(**HYPERPARAMETERS)
allele_encoding = AlleleEncoding(
TRAIN_DF.allele.values,
allele_to_sequence=ALLELE_TO_SEQUENCE)
network.fit(
TRAIN_DF.peptide.values,
affinities=TRAIN_DF.measurement_value.values,
allele_encoding=allele_encoding,
inequalities=TRAIN_DF.measurement_inequality.values)
predictions = network.predict(
peptides=TRAIN_DF.peptide.values,
allele_encoding=allele_encoding)
print(pandas.Series(predictions).describe())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment