diff --git a/README.md b/README.md index bf02a88b45a5e4d4a3de740685a082dfc4c8105c..a4373e2731df3d5308211f075d4dc782e24741fa 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,24 @@ Run `mhcflurry-predict -h` for details. See the [class1_allele_specific_models.ipynb](https://github.com/hammerlab/mhcflurry/blob/master/examples/class1_allele_specific_models.ipynb) notebook for an overview of the Python API, including fitting your own predictors. +## Scanning protein sequences for predicted epitopes + +The [mhctools](https://github.com/hammerlab/mhctools) package provides support +for scanning protein sequences to find predicted epitopes. It supports MHCflurry +as well as other binding predictors. Here is an example: + +``` +# First install mhctools if needed: +pip install mhctools + +# Now generate predictions for protein sequences in FASTA format: +mhctools \ + --mhc-predictor mhcflurry \ + --input-fasta-file INPUT.fasta \ + --mhc-alleles A02:01,A03:01 \ + --extract-subsequences \ + --out RESULT.csv +``` ## Details on the downloadable models diff --git a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py index 30f03ea1d0140bb60943dd97434196cbba986fa7..c0ec13fb16f713df5b173e3477e78752df59a4d8 100644 --- a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py @@ -747,23 +747,25 @@ class Class1AffinityPredictor(object): df["prediction_low"] = numpy.exp(logs.quantile(0.05, axis=1)) df["prediction_high"] = numpy.exp(logs.quantile(0.95, axis=1)) - del df["normalized_allele"] - del df["supported_peptide_length"] if include_individual_model_predictions: columns = sorted(df.columns, key=lambda c: c.startswith('model_')) else: columns = [ c for c in df.columns if c not in df_predictions.columns ] + columns.remove("normalized_allele") + columns.remove("supported_peptide_length") - result = df[columns].copy() if include_percentile_ranks: if self.allele_to_percent_rank_transform: - result["prediction_percentile"] = self.percentile_ranks( - df.prediction, alleles=df.allele.values, throw=throw) + df["prediction_percentile"] = self.percentile_ranks( + df.prediction, + alleles=df.normalized_allele.values, + throw=throw) + columns.append("prediction_percentile") else: warnings.warn("No percentile rank information available.") - return result + return df[columns].copy() @staticmethod def save_weights(weights_list, filename): diff --git a/test/test_class1_affinity_predictor.py b/test/test_class1_affinity_predictor.py index 4d7e67d69c3511e6a9d80ba9515778bc4cd89005..952e484074960f93e1522854be529dd74146b840 100644 --- a/test/test_class1_affinity_predictor.py +++ b/test/test_class1_affinity_predictor.py @@ -163,6 +163,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): peptides=df.peptide.values, affinities=df.measurement_value.values, ) + predictor.calibrate_percentile_ranks() ic50_pred = predictor.predict(df.peptide.values, allele=allele) ic50_true = df.measurement_value.values eq_(len(ic50_pred), len(ic50_true)) @@ -175,6 +176,8 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): ic50_pred_df = predictor.predict_to_dataframe( df.peptide.values, allele=allele) print(ic50_pred_df) + assert 'prediction_percentile' in ic50_pred_df.columns + assert ic50_pred_df.prediction_percentile.isnull().sum() == 0 ic50_pred_df2 = predictor.predict_to_dataframe( df.peptide.values,