Skip to content
Snippets Groups Projects
Commit 72362eb2 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

README update (mhctools mention) and percentile rank tweaks

parent 2b2ce0ce
No related branches found
No related tags found
No related merge requests found
...@@ -123,6 +123,24 @@ Run `mhcflurry-predict -h` for details. ...@@ -123,6 +123,24 @@ Run `mhcflurry-predict -h` for details.
See the [class1_allele_specific_models.ipynb](https://github.com/hammerlab/mhcflurry/blob/master/examples/class1_allele_specific_models.ipynb) See the [class1_allele_specific_models.ipynb](https://github.com/hammerlab/mhcflurry/blob/master/examples/class1_allele_specific_models.ipynb)
notebook for an overview of the Python API, including fitting your own predictors. notebook for an overview of the Python API, including fitting your own predictors.
## Scanning protein sequences for predicted epitopes
The [mhctools](https://github.com/hammerlab/mhctools) package provides support
for scanning protein sequences to find predicted epitopes. It supports MHCflurry
as well as other binding predictors. Here is an example:
```
# First install mhctools if needed:
pip install mhctools
# Now generate predictions for protein sequences in FASTA format:
mhctools \
--mhc-predictor mhcflurry \
--input-fasta-file INPUT.fasta \
--mhc-alleles A02:01,A03:01 \
--extract-subsequences \
--out RESULT.csv
```
## Details on the downloadable models ## Details on the downloadable models
......
...@@ -747,23 +747,25 @@ class Class1AffinityPredictor(object): ...@@ -747,23 +747,25 @@ class Class1AffinityPredictor(object):
df["prediction_low"] = numpy.exp(logs.quantile(0.05, axis=1)) df["prediction_low"] = numpy.exp(logs.quantile(0.05, axis=1))
df["prediction_high"] = numpy.exp(logs.quantile(0.95, axis=1)) df["prediction_high"] = numpy.exp(logs.quantile(0.95, axis=1))
del df["normalized_allele"]
del df["supported_peptide_length"]
if include_individual_model_predictions: if include_individual_model_predictions:
columns = sorted(df.columns, key=lambda c: c.startswith('model_')) columns = sorted(df.columns, key=lambda c: c.startswith('model_'))
else: else:
columns = [ columns = [
c for c in df.columns if c not in df_predictions.columns c for c in df.columns if c not in df_predictions.columns
] ]
columns.remove("normalized_allele")
columns.remove("supported_peptide_length")
result = df[columns].copy()
if include_percentile_ranks: if include_percentile_ranks:
if self.allele_to_percent_rank_transform: if self.allele_to_percent_rank_transform:
result["prediction_percentile"] = self.percentile_ranks( df["prediction_percentile"] = self.percentile_ranks(
df.prediction, alleles=df.allele.values, throw=throw) df.prediction,
alleles=df.normalized_allele.values,
throw=throw)
columns.append("prediction_percentile")
else: else:
warnings.warn("No percentile rank information available.") warnings.warn("No percentile rank information available.")
return result return df[columns].copy()
@staticmethod @staticmethod
def save_weights(weights_list, filename): def save_weights(weights_list, filename):
......
...@@ -163,6 +163,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): ...@@ -163,6 +163,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
peptides=df.peptide.values, peptides=df.peptide.values,
affinities=df.measurement_value.values, affinities=df.measurement_value.values,
) )
predictor.calibrate_percentile_ranks()
ic50_pred = predictor.predict(df.peptide.values, allele=allele) ic50_pred = predictor.predict(df.peptide.values, allele=allele)
ic50_true = df.measurement_value.values ic50_true = df.measurement_value.values
eq_(len(ic50_pred), len(ic50_true)) eq_(len(ic50_pred), len(ic50_true))
...@@ -175,6 +176,8 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): ...@@ -175,6 +176,8 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
ic50_pred_df = predictor.predict_to_dataframe( ic50_pred_df = predictor.predict_to_dataframe(
df.peptide.values, allele=allele) df.peptide.values, allele=allele)
print(ic50_pred_df) print(ic50_pred_df)
assert 'prediction_percentile' in ic50_pred_df.columns
assert ic50_pred_df.prediction_percentile.isnull().sum() == 0
ic50_pred_df2 = predictor.predict_to_dataframe( ic50_pred_df2 = predictor.predict_to_dataframe(
df.peptide.values, df.peptide.values,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment