Newer
Older
from .hyperparameters import HyperparameterDefaults
from .class1_neural_network import Class1NeuralNetwork, DEFAULT_PREDICT_BATCH_SIZE
from .encodable_sequences import EncodableSequences
from .regression_target import from_ic50, to_ic50
from .random_negative_peptides import RandomNegativePeptides
from .allele_encoding import MultipleAlleleEncoding, AlleleEncoding
from .auxiliary_input import AuxiliaryInputEncoder
def __init__(self, class1_ligandome_neural_networks, allele_to_sequence):
self.networks = class1_ligandome_neural_networks
self.allele_to_sequence = allele_to_sequence
@property
def max_alleles(self):
max_alleles = self.networks[0].hyperparameters['max_alleles']
assert all(
n.hyperparameters['max_alleles'] == self.max_alleles
for n in self.networks)
return max_alleles
def predict(self, peptides, alleles, batch_size=DEFAULT_PREDICT_BATCH_SIZE):
return self.predict_to_dataframe(
peptides=peptides,
alleles=alleles,
batch_size=batch_size).score.values
def predict_to_dataframe(
if isinstance(peptides, string_types):
raise TypeError("peptides must be a list or array, not a string")
if isinstance(alleles, string_types):
raise TypeError(
if len(alleles) > self.max_alleles:
raise ValueError(
"When alleles is a list, it must have at most %d elements. "
"These alleles are taken to be a genotype for an "
"individual, and the strongest prediction across alleles "
"will be taken for each peptide. Note that this differs "
"from Class1AffinityPredictor.predict(), where alleles "
"is expected to be the same length as peptides."
% (
self.max_alleles))
alleles = MultipleAlleleEncoding(
experiment_names=numpy.tile("experiment", len(peptides)),
experiment_to_allele_list={
"experiment": alleles,
},
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
max_alleles_per_experiment=self.max_alleles)
score_array = []
affinity_array = []
for (i, network) in enumerate(self.networks):
predictions = network.predict(
peptides=peptides,
allele_encoding=alleles,
batch_size=batch_size)
score_array.append(predictions.score)
affinity_array.append(predictions.affinity)
score_array = numpy.array(score_array)
affinity_array = numpy.array(affinity_array)
ensemble_scores = numpy.mean(score_array, axis=0)
ensemble_affinity = numpy.mean(affinity_array, axis=0)
top_allele_index = numpy.argmax(ensemble_scores, axis=-1)
top_score = ensemble_scores[top_allele_index]
top_affinity = ensemble_affinity[top_allele_index]
result_df = pandas.DataFrame({"peptide": peptides.sequences})
result_df["allele"] = alleles.alleles[top_allele_index]
result_df["score"] = top_score
result_df["affinity"] = to_ic50(top_affinity)
if include_details:
for i in range(self.max_alleles):
result_df["allele%d" % (i + 1)] = alleles.allele[:, i]
result_df["allele%d score" % (i + 1)] = ensemble_scores[:, i]
result_df["allele%d score low" % (i + 1)] = numpy.percentile(
score_array[:, :, i], 5.0, axis=0)
result_df["allele%d score high" % (i + 1)] = numpy.percentile(
score_array[:, :, i], 95.0, axis=0)
result_df["allele%d affinity" % (i + 1)] = to_ic50(
ensemble_affinity[:, i])
result_df["allele%d affinity low" % (i + 1)] = numpy.percentile(
affinity_array[:, :, i], 5.0, axis=0)
result_df["allele%d affinity high" % (i + 1)] = numpy.percentile(
affinity_array[:, :, i], 95.0, axis=0)
return result_df
# TODO: implement saving and loading