diff --git a/README.md b/README.md index b9dfadcc124ddd4734826551dcf2c23364031802..9803c3f0d2e9e514160ac3fcf61470f5555b3a86 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[](https://travis-ci.org/hammerlab/mhcflurry) [](https://coveralls.io/github/hammerlab/mhcflurry?branch=fix-training-script) +[](https://travis-ci.org/hammerlab/mhcflurry) [](https://coveralls.io/github/hammerlab/mhcflurry?branch=master) # mhcflurry Peptide-MHC binding affinity prediction diff --git a/mhcflurry/common.py b/mhcflurry/common.py index 6ab2f9fd64aa394bd0c2a95adbdcdbeb0d8b08b4..a3318c2f900bfa29f5d84ae2da8d55326d828a33 100644 --- a/mhcflurry/common.py +++ b/mhcflurry/common.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015. Mount Sinai School of Medicine +# Copyright (c) 2016. Mount Sinai School of Medicine # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from __future__ import ( division, absolute_import, ) - +import numpy as np def parse_int_list(s): return [int(part.strip() for part in s.split(","))] @@ -65,3 +65,37 @@ def split_allele_names(s): for part in s.split(",") ] + +def ic50_to_regression_target(ic50, max_ic50): + """ + Transform IC50 inhibitory binding concentrations to affinity values between + [0,1] where 0 means a value greater or equal to max_ic50 and 1 means very + strong binder. + + Parameters + ---------- + ic50 : numpy.ndarray + + max_ic50 : float + """ + log_ic50 = np.log(ic50) / np.log(max_ic50) + regression_target = 1.0 - log_ic50 + # clamp to values between 0, 1 + regression_target = np.maximum(regression_target, 0.0) + regression_target = np.minimum(regression_target, 1.0) + return regression_target + +def regression_target_to_ic50(y, max_ic50): + """ + Transform values between [0,1] to IC50 inhibitory binding concentrations + between [1.0, infinity] + + Parameters + ---------- + y : numpy.ndarray of float + + max_ic50 : float + + Returns numpy.ndarray + """ + return max_ic50 ** (1.0 - y) diff --git a/mhcflurry/data.py b/mhcflurry/data.py index d4e9b83ee1c215702bbeed9c24c1a280390a763b..054c1d2e5992040ace4120cf7d1f4ff7efa4dfa6 100644 --- a/mhcflurry/data.py +++ b/mhcflurry/data.py @@ -22,7 +22,7 @@ from collections import namedtuple, defaultdict import pandas as pd import numpy as np -from .common import normalize_allele_name +from .common import normalize_allele_name, ic50_to_regression_target from .amino_acid import common_amino_acids from .peptide_encoding import ( indices_to_hotshot_encoding, @@ -104,8 +104,7 @@ def load_dataframe( only_human : bool Only load entries from human MHC alleles - Returns DataFrame augmented with extra columns: - - "log_ic50" : log(ic50) / log(max_ic50) + Returns DataFrame augmented with extra column: - "regression_output" : 1.0 - log(ic50)/log(max_ic50), limited to [0,1] """ if sep is None: @@ -138,13 +137,7 @@ def load_dataframe( df[allele_column_name] = df[allele_column_name].map(normalize_allele_name) ic50 = np.array(df[ic50_column_name]) - log_ic50 = np.log(ic50) / np.log(max_ic50) - df["log_ic50"] = log_ic50 - regression_output = 1.0 - log_ic50 - # clamp to values between 0, 1 - regression_output = np.maximum(regression_output, 0.0) - regression_output = np.minimum(regression_output, 1.0) - df["regression_output"] = regression_output + df["regression_output"] = ic50_to_regression_target(ic50, max_ic50=max_ic50) return df, peptide_column_name @@ -278,6 +271,57 @@ def encode_peptide_to_affinity_dict( n_samples, len(Y)) return (kmer_peptides, original_peptides, counts, X_index, X_binary, Y) +def create_allele_data_from_peptide_to_ic50_dict( + peptide_to_ic50_dict, + max_ic50=MAX_IC50, + kmer_length=9, + flatten_binary_encoding=True): + """ + Parameters + ---------- + peptide_to_ic50_dict : dict + Dictionary mapping peptides of different lengths to IC50 binding + affinity values. + + max_ic50 : float + Maximum IC50 value used as the cutoff for affinity of 0.0 when + transforming from IC50 to regression targets. + + kmer_length : int + What length substrings will be fed to a fixed-length predictor? + + flatten_binary_encoding : bool + Should hotshot encodings of amino acid inputs be flattened into a 1D + vector or have two dimensions (where the first represents position)? + + Return an AlleleData object. + """ + Y_dict = { + peptide: ic50_to_regression_target(ic50, max_ic50) + for (peptide, ic50) + in peptide_to_ic50_dict.items() + } + (kmer_peptides, original_peptides, counts, X_index, X_binary, Y_kmer) = \ + encode_peptide_to_affinity_dict( + Y_dict, + peptide_length=kmer_length, + flatten_binary_encoding=flatten_binary_encoding) + + ic50_array = np.array([peptide_to_ic50_dict[p] for p in original_peptides]) + assert len(kmer_peptides) == len(ic50_array), \ + "Mismatch between # of peptides %d and # IC50 outputs %d" % ( + len(kmer_peptides), len(ic50_array)) + + return AlleleData( + X_index=X_index, + X_binary=X_binary, + Y=Y_kmer, + ic50=ic50_array, + peptides=kmer_peptides, + original_peptides=original_peptides, + original_lengths=[len(peptide) for peptide in original_peptides], + substring_counts=counts, + weights=1.0 / counts) def load_allele_datasets( filename, @@ -371,35 +415,9 @@ def load_allele_datasets( for (peptide, ic50) in zip(raw_peptides, group[ic50_column_name]) } - - Y_dict = { - peptide: y - for (peptide, y) - in zip(raw_peptides, group["regression_output"]) - } - - (kmer_peptides, original_peptides, counts, X_index, X_binary, Y) = \ - encode_peptide_to_affinity_dict( - Y_dict, - peptide_length=peptide_length, - flatten_binary_encoding=flatten_binary_encoding) - - ic50 = np.array([ic50_dict[p] for p in original_peptides]) - - assert len(kmer_peptides) == len(ic50), \ - "Mismatch between # of peptides %d and # IC50 outputs %d" % ( - len(kmer_peptides), len(ic50)) - - allele_groups[allele] = AlleleData( - X_index=X_index, - X_binary=X_binary, - Y=Y, - ic50=ic50, - peptides=kmer_peptides, - original_peptides=original_peptides, - original_lengths=[len(peptide) for peptide in original_peptides], - substring_counts=counts, - weights=1.0 / counts) + allele_groups[allele] = create_allele_data_from_peptide_to_ic50_dict( + ic50_dict, + max_ic50=max_ic50) return allele_groups diff --git a/mhcflurry/feedforward.py b/mhcflurry/feedforward.py index f2c7876bc520d6f6513b4922335293fba07aabfe..e401b4b823eda2898fdcb7e51dbda5251501b4af 100644 --- a/mhcflurry/feedforward.py +++ b/mhcflurry/feedforward.py @@ -27,28 +27,6 @@ import theano theano.config.exception_verbosity = 'high' -def compile_forward_predictor(model, theano_mode=None): - """ - In cases where we want to get predictions from a model that hasn't - been compiled (to avoid overhead of compiling training code), - use this helper to only compile the subset of Theano needed for - forward-propagation/predictions. - """ - - model.X_test = model.get_input(train=False) - model.y_test = model.get_output(train=False) - if type(model.X_test) == list: - predict_ins = model.X_test - else: - predict_ins = [model.X_test] - - model._predict = theano.function( - predict_ins, - model.y_test, - allow_input_downcast=True, - mode=theano_mode) - - def make_network( input_size, embedding_input_dim=None, @@ -61,8 +39,7 @@ def make_network( dropout_probability=0.0, model=None, optimizer=None, - learning_rate=0.001, - compile_for_training=True): + learning_rate=0.001): if model is None: model = Sequential() @@ -112,16 +89,13 @@ def make_network( output_dim=1, init=init)) model.add(Activation(output_activation)) - if compile_for_training: - model.compile(loss=loss, optimizer=optimizer) - else: - compile_forward_predictor(model) + model.compile(loss=loss, optimizer=optimizer) return model def make_hotshot_network( peptide_length=9, - layer_sizes=[500], + layer_sizes=[100], activation="relu", init="lecun_uniform", loss="mse", @@ -146,7 +120,7 @@ def make_embedding_network( peptide_length=9, embedding_input_dim=20, embedding_output_dim=20, - layer_sizes=[500], + layer_sizes=[100], activation="relu", init="lecun_uniform", loss="mse", diff --git a/mhcflurry/imputation.py b/mhcflurry/imputation.py new file mode 100644 index 0000000000000000000000000000000000000000..145c6e9abce288fefca0a2a8539fbef08ee530f8 --- /dev/null +++ b/mhcflurry/imputation.py @@ -0,0 +1,192 @@ +# Copyright (c) 2015. Mount Sinai School of Medicine +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import ( + print_function, + division, + absolute_import, +) +from collections import defaultdict +import numpy as np +from fancyimpute.dictionary_helpers import dense_matrix_from_nested_dictionary + +from .data import ( + create_allele_data_from_peptide_to_ic50_dict, +) + +def prune_dense_matrix_and_labels( + X, + peptide_list, + allele_list, + min_observations_per_peptide=1, + min_observations_per_allele=1): + """ + Filter the dense matrix of pMHC binding affinities according to + the given minimum number of row/column observations. + + Parameters + ---------- + X : numpy.ndarray + Incomplete dense matrix of pMHC affinity with n_peptides rows and + n_alleles columns. + + peptide_list : list of str + Expected to have n_peptides entries + + allele_list : list of str + Expected to have n_alleles entries + + min_observations_per_peptide : int + Drop peptide rows with fewer than this number of observed values. + + min_observations_per_allele : int + Drop allele columns with fewer than this number of observed values. + """ + observed_mask = np.isfinite(X) + n_observed_per_peptide = observed_mask.sum(axis=1) + too_few_peptide_observations = ( + n_observed_per_peptide < min_observations_per_peptide) + if too_few_peptide_observations.any(): + drop_peptide_indices = np.where(too_few_peptide_observations)[0] + keep_peptide_indices = np.where(~too_few_peptide_observations)[0] + print("Dropping %d peptides with <%d observations" % ( + len(drop_peptide_indices), + min_observations_per_peptide)) + X = X[keep_peptide_indices] + observed_mask = observed_mask[keep_peptide_indices] + peptide_list = [peptide_list[i] for i in keep_peptide_indices] + + n_observed_per_allele = observed_mask.sum(axis=0) + too_few_allele_observations = ( + n_observed_per_allele < min_observations_per_peptide) + if too_few_peptide_observations.any(): + drop_allele_indices = np.where(too_few_allele_observations)[0] + keep_allele_indices = np.where(~too_few_allele_observations)[0] + print("Dropping %d alleles with <%d observations: %s" % ( + len(drop_allele_indices), + min_observations_per_allele, + [allele_list[i] for i in drop_allele_indices])) + X = X[:, keep_allele_indices] + observed_mask = observed_mask[:, keep_allele_indices] + allele_list = [allele_list[i] for i in keep_allele_indices] + return X, peptide_list, allele_list + + +def create_incomplete_dense_pMHC_matrix( + allele_data_dict, + min_observations_per_peptide=1, + min_observations_per_allele=1): + """ + Given a dictionary mapping each allele name onto an AlleleData object, + returns a tuple with a dense matrix of affinities, a list of peptide labels + for each row and a list of allele labels for each column. + + Parameters + ---------- + allele_data_dict : dict + Dictionary mapping allele names to AlleleData objects + + imputer : object + Expected to have a method imputer.complete(X) which takes an array + with missing entries marked by NaN and returns a completed array. + + min_observations_per_peptide : int + Drop peptide rows with fewer than this number of observed values. + + min_observations_per_allele : int + Drop allele columns with fewer than this number of observed values. + """ + peptide_to_allele_to_affinity_dict = defaultdict(dict) + for (allele_name, allele_data) in allele_data_dict.items(): + for peptide, affinity in zip( + allele_data.original_peptides, + allele_data.Y): + if allele_name not in peptide_to_allele_to_affinity_dict[peptide]: + peptide_to_allele_to_affinity_dict[peptide][allele_name] = affinity + + n_binding_values = sum( + len(allele_dict) + for allele_dict in + allele_data_dict.values() + ) + print("Collected %d binding values for %d alleles" % ( + n_binding_values, + len(peptide_to_allele_to_affinity_dict))) + + X, peptide_list, allele_list = \ + dense_matrix_from_nested_dictionary(peptide_to_allele_to_affinity_dict) + return prune_dense_matrix_and_labels( + X, + peptide_list, + allele_list, + min_observations_per_peptide=min_observations_per_peptide, + min_observations_per_allele=min_observations_per_allele) + +def dense_pMHC_matrix_to_nested_dict(X, peptide_list, allele_list): + """ + Converts a dense matrix of (n_peptides, n_alleles) floats to a nested + dictionary from allele -> peptide -> affinity. + """ + allele_to_peptide_to_ic50_dict = defaultdict(dict) + for row_index, peptide in enumerate(peptide_list): + for column_index, allele_name in enumerate(allele_list): + affinity = X[row_index, column_index] + if np.isfinite(affinity): + allele_to_peptide_to_ic50_dict[allele_name][peptide] = affinity + return allele_to_peptide_to_ic50_dict + +def create_imputed_datasets( + allele_data_dict, + imputer, + min_observations_per_peptide=1, + min_observations_per_allele=1): + """ + Parameters + ---------- + allele_data_dict : dict + Dictionary mapping allele names to AlleleData objects + + imputer : object + Expected to have a method imputer.complete(X) which takes an array + with missing entries marked by NaN and returns a completed array. + + min_observations_per_peptide : int + Drop peptide rows with fewer than this number of observed values. + + min_observations_per_allele : int + Drop allele columns with fewer than this number of observed values. + + Returns dictionary mapping allele names to AlleleData objects containing + imputed affinity values. + """ + X_incomplete, peptide_list, allele_list = create_incomplete_dense_pMHC_matrix( + allele_data_dict=allele_data_dict, + min_observations_per_peptide=min_observations_per_peptide, + min_observations_per_allele=min_observations_per_allele) + if np.isnan(X_incomplete).sum() == 0: + # if all entries in the matrix are already filled in then don't + # try using an imputation algorithm since it might raise an + # exception. + X_complete = X_incomplete + else: + X_complete = imputer.complete(X_incomplete) + allele_to_peptide_to_affinity_dict = dense_pMHC_matrix_to_nested_dict( + X=X_complete, + peptide_list=peptide_list, + allele_list=allele_list) + return { + allele_name: create_allele_data_from_peptide_to_ic50_dict(allele_dict) + for (allele_name, allele_dict) + in allele_to_peptide_to_affinity_dict.items() + } diff --git a/mhcflurry/package_metadata.py b/mhcflurry/package_metadata.py index e62b38a755720fa3b92a923aaa4f29810c2feb63..373d4d3d28d8975d1ea8aee614ccb9c1027eb0d4 100644 --- a/mhcflurry/package_metadata.py +++ b/mhcflurry/package_metadata.py @@ -1,2 +1,2 @@ -__version__ = "0.0.2" +__version__ = "0.0.3" diff --git a/mhcflurry/predict.py b/mhcflurry/predict.py index 1955b89202396e1e9f1814c846f31c24c75b425f..cbfb3a0600493b3fb17f0f07b04bd84036f120cc 100644 --- a/mhcflurry/predict.py +++ b/mhcflurry/predict.py @@ -17,11 +17,19 @@ from collections import OrderedDict import pandas as pd from .class1_binding_predictor import Class1BindingPredictor +from .common import normalize_allele_name def predict(alleles, peptides): - allele_results = OrderedDict([]) + result_dict = OrderedDict([ + ("allele", []), + ("peptide", []), + ("ic50", []), + ]) for allele in alleles: + allele = normalize_allele_name(allele) model = Class1BindingPredictor.from_allele_name(allele) - result_dictionary = model.predict_peptides(peptides) - allele_results.append(result_dictionary) - return pd.concat(allele_results) + for i, ic50 in enumerate(model.predict_peptides_ic50(peptides)): + result_dict["allele"].append(allele) + result_dict["peptide"].append(peptides[i]) + result_dict["ic50"].append(ic50) + return pd.DataFrame(result_dict) diff --git a/mhcflurry/predictor_base.py b/mhcflurry/predictor_base.py index a82192fed8f1400532e287c7e937a092b0b7a385..dba570da307c3b594f566a6ce9cfaf9904157418 100644 --- a/mhcflurry/predictor_base.py +++ b/mhcflurry/predictor_base.py @@ -22,6 +22,7 @@ from .amino_acid import ( amino_acids_with_unknown, common_amino_acids ) +from .common import regression_target_to_ic50 class PredictorBase(object): @@ -40,13 +41,6 @@ class PredictorBase(object): self.allow_unknown_amino_acids = allow_unknown_amino_acids self.verbose = verbose - def log_to_ic50(self, log_value): - """ - Convert neural network output to IC50 values between 0.0 and - self.max_ic50 (typically 5000, 20000 or 50000) - """ - return self.max_ic50 ** (1.0 - log_value) - def encode_9mer_peptides(self, peptides): if self.allow_unknown_amino_acids: return amino_acids_with_unknown.index_encoding(peptides, 9) @@ -90,14 +84,15 @@ class PredictorBase(object): return self.predict(X) def predict_9mer_peptides_ic50(self, peptides): - return self.log_to_ic50(self.predict_9mer_peptides(peptides)) + scores = self.predict_9mer_peptides(peptides) + return regression_target_to_ic50(scores, max_ic50=self.max_ic50) def predict_peptides_ic50(self, peptides): """ Predict IC50 affinities for peptides of any length """ - return self.log_to_ic50( - self.predict_peptides(peptides)) + scores = self.predict_peptides(peptides) + return regression_target_to_ic50(scores, max_ic50=self.max_ic50) def predict(self, X): raise ValueError("Method 'predict' not yet implemented for %s!" % ( diff --git a/requirements.txt b/requirements.txt index f1babbeae2cfeb74c412b1c5741d4bce989a45fa..57f834659f2a69130b3c513e09cfc6c48bfde186 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ h5py cherrypy bottle fancyimpute -scikit-learn +scikit-learn \ No newline at end of file diff --git a/test/test_allele_data.py b/test/test_allele_data.py new file mode 100644 index 0000000000000000000000000000000000000000..ce7e08a302f5efb9ff3b010a9df426ccb212a747 --- /dev/null +++ b/test/test_allele_data.py @@ -0,0 +1,21 @@ +from nose.tools import eq_ +from mhcflurry.data import ( + create_allele_data_from_peptide_to_ic50_dict, + AlleleData +) + +def test_create_allele_data_from_peptide_to_ic50_dict(): + peptide_to_ic50_dict = { + ("A" * 10): 1.2, + ("C" * 9): 1000, + } + allele_data = create_allele_data_from_peptide_to_ic50_dict( + peptide_to_ic50_dict, + max_ic50=50000.0) + assert isinstance(allele_data, AlleleData) + expected_peptides = set([ + "A" * 9, + "C" * 9, + ]) + peptides = set(allele_data.peptides) + eq_(expected_peptides, peptides) diff --git a/test/test_common.py b/test/test_common.py new file mode 100644 index 0000000000000000000000000000000000000000..fb5cd8d9c5eb684157c69bffb771b1fbab8bd7bf --- /dev/null +++ b/test/test_common.py @@ -0,0 +1,13 @@ +from mhcflurry.common import ( + ic50_to_regression_target, + regression_target_to_ic50, +) +from nose.tools import eq_ + +def test_regression_target_to_ic50(): + eq_(regression_target_to_ic50(0, max_ic50=500.0), 500) + eq_(regression_target_to_ic50(1, max_ic50=500.0), 1.0) + +def test_ic50_to_regression_target(): + eq_(ic50_to_regression_target(5000, max_ic50=5000.0), 0) + eq_(ic50_to_regression_target(0, max_ic50=5000.0), 1.0) diff --git a/test/test_imputation.py b/test/test_imputation.py new file mode 100644 index 0000000000000000000000000000000000000000..fcc2ccb7d6dc90e65df42d8607130d2a7f87ffa7 --- /dev/null +++ b/test/test_imputation.py @@ -0,0 +1,30 @@ +from mhcflurry.imputation import ( + create_imputed_datasets, +) +from mhcflurry.data import create_allele_data_from_peptide_to_ic50_dict + +from fancyimpute import MICE +from nose.tools import eq_ + +def test_create_imputed_datasets_empty(): + result = create_imputed_datasets({}, imputer=MICE(n_imputations=25)) + eq_(result, {}) + +def test_create_imputed_datasets_two_alleles(): + allele_data_dict = { + "HLA-A*02:01": create_allele_data_from_peptide_to_ic50_dict({ + "A" * 9: 20.0, + "C" * 9: 40000.0, + }), + "HLA-A*02:05": create_allele_data_from_peptide_to_ic50_dict({ + "S" * 9: 500.0, + "A" * 9: 25.0, + }), + } + result = create_imputed_datasets(allele_data_dict, imputer=MICE(n_imputations=25)) + eq_(set(result.keys()), {"HLA-A*02:01", "HLA-A*02:05"}) + expected_peptides = {"A" * 9, "C" * 9, "S" * 9} + for allele_name, allele_data in result.items(): + print(allele_name) + print(allele_data) + eq_(set(allele_data.peptides), expected_peptides) diff --git a/test/test_neural_nets.py b/test/test_neural_nets.py new file mode 100644 index 0000000000000000000000000000000000000000..99f8f77470e879da3f9830e8cb16d581870d5bd4 --- /dev/null +++ b/test/test_neural_nets.py @@ -0,0 +1,64 @@ +from mhcflurry.feedforward import ( + make_embedding_network, + make_hotshot_network, +) +import numpy as np + +def test_make_embedding_network(): + nn = make_embedding_network( + peptide_length=3, + layer_sizes=[3], + activation="tanh", + embedding_input_dim=3, + embedding_output_dim=20, + learning_rate=0.1) + + X_negative = np.array([ + [0] * 3, + [1] * 3, + [1, 0, 0], + [1, 1, 0], + [0, 1, 0], + [0, 0, 1], + [1, 0, 1], + ]) + X_positive = np.array([ + [0, 2, 0], + [1, 2, 0], + [1, 2, 1], + [0, 2, 1], + [2, 2, 0], + [2, 2, 1], + [2, 2, 2], + ]) + X_index = np.vstack([X_negative, X_positive]) + Y = np.array([0.0] * len(X_negative) + [1.0] * len(X_positive)) + nn.fit(X_index, Y, nb_epoch=10) + Y_pred = nn.predict(X_index) + print(Y) + print(Y_pred) + for (Y_i, Y_pred_i) in zip(Y, Y_pred): + assert abs(Y_i - Y_pred_i) <= 0.25, (Y_i, Y_pred_i) + +def test_make_hotshot_network(): + nn = make_hotshot_network( + peptide_length=3, + activation="relu", + layer_sizes=[4], + n_amino_acids=2, + learning_rate=0.1) + X_binary = np.array([ + [True, False, True, False, True, False], + [True, False, True, False, False, True], + [True, False, False, True, True, False], + [True, False, False, True, False, True], + [False, True, True, False, True, False], + [False, True, True, False, False, True], + ], dtype=bool) + Y = np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0]) + nn.fit(X_binary, Y, nb_epoch=10) + Y_pred = nn.predict(X_binary) + print(Y) + print(Y_pred) + for (Y_i, Y_pred_i) in zip(Y, Y_pred): + assert abs(Y_i - Y_pred_i) <= 0.25, (Y_i, Y_pred_i) diff --git a/test/test_predict.py b/test/test_predict.py new file mode 100644 index 0000000000000000000000000000000000000000..eb07507e0b995459a02d44e7c32054988255ccbc --- /dev/null +++ b/test/test_predict.py @@ -0,0 +1,8 @@ +from mhcflurry import predict + +def test_predict_A1_Titin_epitope(): + result_df = predict(alleles=["HLA-A*01:01"], peptides=["ESDPIVAQY"]) + assert len(result_df) == 1 + row = result_df.ix[0] + ic50 = row["ic50"] + assert ic50 <= 500