README update (mhctools mention) and percentile rank tweaks

72362eb2 · Tim O'Donnell · 2b2ce0ce · 72362eb2 · 72362eb2 · 72362eb2
Commit 72362eb2 authored 7 years ago by Tim O'Donnell
--- a/README.md
+++ b/README.md
@@ -123,6 +123,24 @@ Run `mhcflurry-predict -h` for details.
 See the [class1_allele_specific_models.ipynb](https://github.com/hammerlab/mhcflurry/blob/master/examples/class1_allele_specific_models.ipynb)
 notebook for an overview of the Python API, including fitting your own predictors.
+## Scanning protein sequences for predicted epitopes
+The [mhctools](https://github.com/hammerlab/mhctools) package provides support
+for scanning protein sequences to find predicted epitopes. It supports MHCflurry
+as well as other binding predictors. Here is an example:
+```
+# First install mhctools if needed:
+pip install mhctools
+# Now generate predictions for protein sequences in FASTA format:
+mhctools \
+    --mhc-predictor mhcflurry \
+    --input-fasta-file INPUT.fasta \
+    --mhc-alleles A02:01,A03:01 \
+    --extract-subsequences \
+    --out RESULT.csv
+```
 ## Details on the downloadable models

--- a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
@@ -747,23 +747,25 @@ class Class1AffinityPredictor(object):
        df["prediction_low"] = numpy.exp(logs.quantile(0.05, axis=1))
        df["prediction_high"] = numpy.exp(logs.quantile(0.95, axis=1))
-        del df["normalized_allele"]
-        del df["supported_peptide_length"]
        if include_individual_model_predictions:
            columns = sorted(df.columns, key=lambda c: c.startswith('model_'))
        else:
            columns = [
                c for c in df.columns if c not in df_predictions.columns
            ]
+        columns.remove("normalized_allele")
+        columns.remove("supported_peptide_length")
-        result = df[columns].copy()
        if include_percentile_ranks:
            if self.allele_to_percent_rank_transform:
-                result["prediction_percentile"] = self.percentile_ranks(
+                df["prediction_percentile"] = self.percentile_ranks(
-                    df.prediction, alleles=df.allele.values, throw=throw)
+                    df.prediction,
+                    alleles=df.normalized_allele.values,
+                    throw=throw)
+                columns.append("prediction_percentile")
            else:
                warnings.warn("No percentile rank information available.")
-        return result
+        return df[columns].copy()
    @staticmethod
    def save_weights(weights_list, filename):

--- a/test/test_class1_affinity_predictor.py
+++ b/test/test_class1_affinity_predictor.py
@@ -163,6 +163,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
        peptides=df.peptide.values,
        affinities=df.measurement_value.values,
    )
+    predictor.calibrate_percentile_ranks()
    ic50_pred = predictor.predict(df.peptide.values, allele=allele)
    ic50_true = df.measurement_value.values
    eq_(len(ic50_pred), len(ic50_true))
@@ -175,6 +176,8 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
    ic50_pred_df = predictor.predict_to_dataframe(
        df.peptide.values, allele=allele)
    print(ic50_pred_df)
+    assert 'prediction_percentile' in ic50_pred_df.columns
+    assert ic50_pred_df.prediction_percentile.isnull().sum() == 0
    ic50_pred_df2 = predictor.predict_to_dataframe(
        df.peptide.values,