Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import logging
import numpy
import pandas
from ...common import normalize_allele_name
from ...predict import predict
from ..percent_rank_transform import PercentRankTransform
from .presentation_component_model import PresentationComponentModel
class MHCflurryReleased(PresentationComponentModel):
"""
Final model input that uses the standard downloaded MHCflurry models.
Parameters
------------
experiment_to_alleles : dict: string -> string list
Normalized allele names for each experiment.
random_peptides_for_percent_rank : list of string
If specified, then percentile rank will be calibrated and emitted
using the given peptides.
"""
def __init__(
self,
experiment_to_alleles,
random_peptides_for_percent_rank=None,
**kwargs):
PresentationComponentModel.__init__(self, **kwargs)
self.experiment_to_alleles = experiment_to_alleles
if random_peptides_for_percent_rank is None:
self.percent_rank_transforms = None
self.random_peptides_for_percent_rank = None
else:
self.percent_rank_transforms = {}
self.random_peptides_for_percent_rank = numpy.array(
random_peptides_for_percent_rank)
def column_names(self):
columns = ['mhcflurry_released_affinity']
if self.percent_rank_transforms is not None:
columns.append('mhcflurry_released_percentile_rank')
return columns
def requires_fitting(self):
return False
def fit_percentile_rank_if_needed(self, alleles):
for allele in alleles:
if allele not in self.percent_rank_transforms:
logging.info('fitting percent rank for allele: %s' % allele)
self.percent_rank_transforms[allele] = PercentRankTransform()
self.percent_rank_transforms[allele].fit(
predict(
[allele],
self.random_peptides_for_percent_rank)
.Prediction.values)
def predict_min_across_alleles(self, alleles, peptides):
alleles = [
normalize_allele_name(allele)
for allele in alleles
]
df = predict(alleles, numpy.unique(numpy.array(peptides)))
pivoted = df.pivot(index='Peptide', columns='Allele')
pivoted.columns = pivoted.columns.droplevel()
result = {
'mhcflurry_released_affinity': (
pivoted.min(axis=1).ix[peptides].values)
}
if self.percent_rank_transforms is not None:
self.fit_percentile_rank_if_needed(alleles)
percentile_ranks = pandas.DataFrame(index=pivoted.index)
for allele in alleles:
percentile_ranks[allele] = (
self.percent_rank_transforms[allele]
.transform(pivoted[allele].values))
result['mhcflurry_released_percentile_rank'] = (
percentile_ranks.min(axis=1).ix[peptides].values)
return result
def predict_for_experiment(self, experiment_name, peptides):
alleles = self.experiment_to_alleles[experiment_name]
return self.predict_min_across_alleles(alleles, peptides)