From 72362eb267c5e44665aef2f40ca1a0e5e74b50ca Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sat, 25 Nov 2017 10:12:23 -0500
Subject: [PATCH] README update (mhctools mention) and percentile rank tweaks

---
 README.md                                      | 18 ++++++++++++++++++
 .../class1_affinity_predictor.py               | 14 ++++++++------
 test/test_class1_affinity_predictor.py         |  3 +++
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index bf02a88b..a4373e27 100644
--- a/README.md
+++ b/README.md
@@ -123,6 +123,24 @@ Run `mhcflurry-predict -h` for details.
 See the [class1_allele_specific_models.ipynb](https://github.com/hammerlab/mhcflurry/blob/master/examples/class1_allele_specific_models.ipynb)
 notebook for an overview of the Python API, including fitting your own predictors.
 
+## Scanning protein sequences for predicted epitopes
+
+The [mhctools](https://github.com/hammerlab/mhctools) package provides support
+for scanning protein sequences to find predicted epitopes. It supports MHCflurry
+as well as other binding predictors. Here is an example:
+
+```
+# First install mhctools if needed:
+pip install mhctools
+
+# Now generate predictions for protein sequences in FASTA format:
+mhctools \
+    --mhc-predictor mhcflurry \
+    --input-fasta-file INPUT.fasta \
+    --mhc-alleles A02:01,A03:01 \
+    --extract-subsequences \
+    --out RESULT.csv
+```
 
 ## Details on the downloadable models
 
diff --git a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
index 30f03ea1..c0ec13fb 100644
--- a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
@@ -747,23 +747,25 @@ class Class1AffinityPredictor(object):
         df["prediction_low"] = numpy.exp(logs.quantile(0.05, axis=1))
         df["prediction_high"] = numpy.exp(logs.quantile(0.95, axis=1))
 
-        del df["normalized_allele"]
-        del df["supported_peptide_length"]
         if include_individual_model_predictions:
             columns = sorted(df.columns, key=lambda c: c.startswith('model_'))
         else:
             columns = [
                 c for c in df.columns if c not in df_predictions.columns
             ]
+        columns.remove("normalized_allele")
+        columns.remove("supported_peptide_length")
 
-        result = df[columns].copy()
         if include_percentile_ranks:
             if self.allele_to_percent_rank_transform:
-                result["prediction_percentile"] = self.percentile_ranks(
-                    df.prediction, alleles=df.allele.values, throw=throw)
+                df["prediction_percentile"] = self.percentile_ranks(
+                    df.prediction,
+                    alleles=df.normalized_allele.values,
+                    throw=throw)
+                columns.append("prediction_percentile")
             else:
                 warnings.warn("No percentile rank information available.")
-        return result
+        return df[columns].copy()
 
     @staticmethod
     def save_weights(weights_list, filename):
diff --git a/test/test_class1_affinity_predictor.py b/test/test_class1_affinity_predictor.py
index 4d7e67d6..952e4840 100644
--- a/test/test_class1_affinity_predictor.py
+++ b/test/test_class1_affinity_predictor.py
@@ -163,6 +163,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
         peptides=df.peptide.values,
         affinities=df.measurement_value.values,
     )
+    predictor.calibrate_percentile_ranks()
     ic50_pred = predictor.predict(df.peptide.values, allele=allele)
     ic50_true = df.measurement_value.values
     eq_(len(ic50_pred), len(ic50_true))
@@ -175,6 +176,8 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
     ic50_pred_df = predictor.predict_to_dataframe(
         df.peptide.values, allele=allele)
     print(ic50_pred_df)
+    assert 'prediction_percentile' in ic50_pred_df.columns
+    assert ic50_pred_df.prediction_percentile.isnull().sum() == 0
 
     ic50_pred_df2 = predictor.predict_to_dataframe(
         df.peptide.values,
-- 
GitLab