Newer
Older
import logging
import numpy
import pandas
from ...common import normalize_allele_name
from ...predict import predict
from ..percent_rank_transform import PercentRankTransform
from .presentation_component_model import PresentationComponentModel
class MHCflurryReleased(PresentationComponentModel):
"""
Final model input that uses the standard downloaded MHCflurry models.
Parameters
------------
experiment_to_alleles : dict: string -> string list
Normalized allele names for each experiment.
random_peptides_for_percent_rank : list of string
If specified, then percentile rank will be calibrated and emitted
using the given peptides.
predictor : Class1EnsembleMultiAllelePredictor-like object
Predictor to use.
"""
def __init__(
self,
experiment_to_alleles,
random_peptides_for_percent_rank=None,
**kwargs):
PresentationComponentModel.__init__(self, **kwargs)
self.experiment_to_alleles = experiment_to_alleles
self.predictor = predictor
self.predictor_name = predictor_name
if random_peptides_for_percent_rank is None:
self.percent_rank_transforms = None
self.random_peptides_for_percent_rank = None
else:
self.percent_rank_transforms = {}
self.random_peptides_for_percent_rank = numpy.array(
random_peptides_for_percent_rank)
def column_names(self):
if self.percent_rank_transforms is not None:
return columns
def requires_fitting(self):
return False
def fit_percentile_rank_if_needed(self, alleles):
for allele in alleles:
if allele not in self.percent_rank_transforms:
logging.info('fitting percent rank for allele: %s' % allele)
self.percent_rank_transforms[allele] = PercentRankTransform()
self.percent_rank_transforms[allele].fit(
predict(
[allele],
self.random_peptides_for_percent_rank)
.Prediction.values)
def predict_min_across_alleles(self, alleles, peptides):
alleles = [
normalize_allele_name(allele)
for allele in alleles
]
df = predict(
alleles,
numpy.unique(numpy.array(peptides)),
predictor=self.predictor)
pivoted = df.pivot(index='Peptide', columns='Allele')
pivoted.columns = pivoted.columns.droplevel()
result = {
pivoted.min(axis=1).ix[peptides].values)
}
if self.percent_rank_transforms is not None:
self.fit_percentile_rank_if_needed(alleles)
percentile_ranks = pandas.DataFrame(index=pivoted.index)
for allele in alleles:
percentile_ranks[allele] = (
self.percent_rank_transforms[allele]
.transform(pivoted[allele].values))
percentile_ranks.min(axis=1).ix[peptides].values)
return result
def predict_for_experiment(self, experiment_name, peptides):
alleles = self.experiment_to_alleles[experiment_name]
return self.predict_min_across_alleles(alleles, peptides)