Newer
Older
import numpy
numpy.random.seed(0)
import time
import cProfile
import pstats
import pandas
from mhcflurry import Class1AffinityPredictor
from mhcflurry.encodable_sequences import EncodableSequences
global ALLELE_SPECIFIC_PREDICTOR, PAN_ALLELE_PREDICTOR_NO_MASS_SPEC
startup()
ALLELE_SPECIFIC_PREDICTOR = Class1AffinityPredictor.load(
get_path("models_class1", "models"))
PAN_ALLELE_PREDICTOR = Class1AffinityPredictor.load(
get_path("models_class1_pan", "models.with_mass_spec"))
global ALLELE_SPECIFIC_PREDICTOR, PAN_ALLELE_PREDICTOR_NO_MASS_SPEC
def test_speed_allele_specific(profile=False, num=DEFAULT_NUM_PREDICTIONS):
global ALLELE_SPECIFIC_PREDICTOR
starts = collections.OrderedDict()
timings = collections.OrderedDict()
profilers = collections.OrderedDict()
def start(name):
starts[name] = time.time()
if profile:
profilers[name] = cProfile.Profile()
profilers[name].enable()
def end(name):
timings[name] = time.time() - starts[name]
if profile:
profilers[name].disable()
start("first")
peptides = random_peptides(num)
start("pred_%d" % num)
predictor.predict(peptides, allele="HLA-A*02:01")
end("pred_%d" % num)
NUM2 = 10000
peptides = EncodableSequences.create(random_peptides(NUM2, length=13))
start("encode_blosum_%d" % NUM2)
peptides.variable_length_to_fixed_length_vector_encoding("BLOSUM62")
end("encode_blosum_%d" % NUM2)
start("pred_already_encoded_%d" % NUM2)
NUM_REPEATS = 100
start("pred_already_encoded_%d_%d_times" % (NUM2, NUM_REPEATS))
for _ in range(NUM_REPEATS):
end("pred_already_encoded_%d_%d_times" % (NUM2, NUM_REPEATS))
print("SPEED BENCHMARK")
print("Results:\n%s" % str(pandas.Series(timings)))
return dict(
(key, pstats.Stats(value)) for (key, value) in profilers.items())
def test_speed_pan_allele(profile=False, num=DEFAULT_NUM_PREDICTIONS):
starts = collections.OrderedDict()
timings = collections.OrderedDict()
profilers = collections.OrderedDict()
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def start(name):
starts[name] = time.time()
if profile:
profilers[name] = cProfile.Profile()
profilers[name].enable()
def end(name):
timings[name] = time.time() - starts[name]
if profile:
profilers[name].disable()
start("first")
predictor.predict(["SIINFEKL"], allele="HLA-A*02:01")
end("first")
peptides = random_peptides(num)
start("pred_%d" % num)
predictor.predict(peptides, allele="HLA-A*02:01")
end("pred_%d" % num)
print("SPEED BENCHMARK")
print("Results:\n%s" % str(pandas.Series(timings)))
return dict(
(key, pstats.Stats(value)) for (key, value) in profilers.items())
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
"--predictor",
nargs="+",
choices=["allele-specific", "pan-allele"],
default=["allele-specific", "pan-allele"],
help="Which predictors to run")
parser.add_argument(
"--num-predictions",
type=int,
default=DEFAULT_NUM_PREDICTIONS,
help="Number of predictions to run")
if __name__ == '__main__':
# If run directly from python, do profiling and leave the user in a shell
# to explore results.
if "allele-specific" in args.predictor:
print("Running allele-specific test")
result = test_speed_allele_specific(
profile=True, num=args.num_predictions)
result[
"pred_%d" % args.num_predictions
].sort_stats("cumtime").reverse_order().print_stats()
if "pan-allele" in args.predictor:
print("Running pan-allele test")
result = test_speed_pan_allele(
profile=True, num=args.num_predictions)
result[
"pred_%d" % args.num_predictions
].sort_stats("cumtime").reverse_order().print_stats()
# Leave in ipython
locals().update(result)
import ipdb # pylint: disable=import-error
ipdb.set_trace()