Newer
Older
import logging
logging.getLogger('tensorflow').disabled = True
logging.getLogger('matplotlib').disabled = True
import numpy
numpy.random.seed(0)
import time
import cProfile
import pstats
import pandas
from mhcflurry import Class1AffinityPredictor
from mhcflurry.encodable_sequences import EncodableSequences
startup()
ALLELE_SPECIFIC_PREDICTOR = Class1AffinityPredictor.load(
get_path("models_class1", "models"))
PAN_ALLELE_PREDICTOR = Class1AffinityPredictor.load(
def test_speed_allele_specific(profile=False, num=DEFAULT_NUM_PREDICTIONS):
global ALLELE_SPECIFIC_PREDICTOR
starts = collections.OrderedDict()
timings = collections.OrderedDict()
profilers = collections.OrderedDict()
def start(name):
starts[name] = time.time()
if profile:
profilers[name] = cProfile.Profile()
profilers[name].enable()
def end(name):
timings[name] = time.time() - starts[name]
if profile:
profilers[name].disable()
start("first")
peptides = random_peptides(num)
start("pred_%d" % num)
predictor.predict(peptides, allele="HLA-A*02:01")
end("pred_%d" % num)
NUM2 = 10000
peptides = EncodableSequences.create(random_peptides(NUM2, length=13))
start("encode_blosum_%d" % NUM2)
peptides.variable_length_to_fixed_length_vector_encoding("BLOSUM62")
end("encode_blosum_%d" % NUM2)
start("pred_already_encoded_%d" % NUM2)
NUM_REPEATS = 100
start("pred_already_encoded_%d_%d_times" % (NUM2, NUM_REPEATS))
for _ in range(NUM_REPEATS):
end("pred_already_encoded_%d_%d_times" % (NUM2, NUM_REPEATS))
print("SPEED BENCHMARK")
print("Results:\n%s" % str(pandas.Series(timings)))
return dict(
(key, pstats.Stats(value)) for (key, value) in profilers.items())
def test_speed_pan_allele(profile=False, num=DEFAULT_NUM_PREDICTIONS):
starts = collections.OrderedDict()
timings = collections.OrderedDict()
profilers = collections.OrderedDict()
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def start(name):
starts[name] = time.time()
if profile:
profilers[name] = cProfile.Profile()
profilers[name].enable()
def end(name):
timings[name] = time.time() - starts[name]
if profile:
profilers[name].disable()
start("first")
predictor.predict(["SIINFEKL"], allele="HLA-A*02:01")
end("first")
peptides = random_peptides(num)
start("pred_%d" % num)
predictor.predict(peptides, allele="HLA-A*02:01")
end("pred_%d" % num)
print("SPEED BENCHMARK")
print("Results:\n%s" % str(pandas.Series(timings)))
return dict(
(key, pstats.Stats(value)) for (key, value) in profilers.items())
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
"--predictor",
nargs="+",
choices=["allele-specific", "pan-allele"],
default=["allele-specific", "pan-allele"],
help="Which predictors to run")
parser.add_argument(
"--num-predictions",
type=int,
default=DEFAULT_NUM_PREDICTIONS,
help="Number of predictions to run")
if __name__ == '__main__':
# If run directly from python, do profiling and leave the user in a shell
# to explore results.
if "allele-specific" in args.predictor:
print("Running allele-specific test")
result = test_speed_allele_specific(
profile=True, num=args.num_predictions)
result[
"pred_%d" % args.num_predictions
].sort_stats("cumtime").reverse_order().print_stats()
if "pan-allele" in args.predictor:
print("Running pan-allele test")
result = test_speed_pan_allele(
profile=True, num=args.num_predictions)
result[
"pred_%d" % args.num_predictions
].sort_stats("cumtime").reverse_order().print_stats()
# Leave in ipython
locals().update(result)
import ipdb # pylint: disable=import-error
ipdb.set_trace()