Newer
Older
import numpy
numpy.random.seed(0)
import time
import cProfile
import pstats
import pandas
from mhcflurry import Class1AffinityPredictor
from mhcflurry.encodable_sequences import EncodableSequences
from mhcflurry.testing_utils import module_cleanup
teardown = module_cleanup
ALLELE_SPECIFIC_PREDICTOR = Class1AffinityPredictor.load(
get_path("models_class1", "models"))
PAN_ALLELE_PREDICTOR = Class1AffinityPredictor.load(
get_path("models_class1_pan", "models.with_mass_spec"))
def test_speed_allele_specific(
profile=False,
predictor=ALLELE_SPECIFIC_PREDICTOR,
num=DEFAULT_NUM_PREDICTIONS):
starts = collections.OrderedDict()
timings = collections.OrderedDict()
profilers = collections.OrderedDict()
def start(name):
starts[name] = time.time()
if profile:
profilers[name] = cProfile.Profile()
profilers[name].enable()
def end(name):
timings[name] = time.time() - starts[name]
if profile:
profilers[name].disable()
start("first")
peptides = random_peptides(num)
start("pred_%d" % num)
predictor.predict(peptides, allele="HLA-A*02:01")
end("pred_%d" % num)
NUM2 = 10000
peptides = EncodableSequences.create(random_peptides(NUM2, length=13))
start("encode_blosum_%d" % NUM2)
peptides.variable_length_to_fixed_length_vector_encoding("BLOSUM62")
end("encode_blosum_%d" % NUM2)
start("pred_already_encoded_%d" % NUM2)
NUM_REPEATS = 100
start("pred_already_encoded_%d_%d_times" % (NUM2, NUM_REPEATS))
for _ in range(NUM_REPEATS):
end("pred_already_encoded_%d_%d_times" % (NUM2, NUM_REPEATS))
print("SPEED BENCHMARK")
print("Results:\n%s" % str(pandas.Series(timings)))
return dict(
(key, pstats.Stats(value)) for (key, value) in profilers.items())
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def test_speed_pan_allele(
profile=False,
predictor=PAN_ALLELE_PREDICTOR,
num=DEFAULT_NUM_PREDICTIONS):
starts = collections.OrderedDict()
timings = collections.OrderedDict()
profilers = collections.OrderedDict()
def start(name):
starts[name] = time.time()
if profile:
profilers[name] = cProfile.Profile()
profilers[name].enable()
def end(name):
timings[name] = time.time() - starts[name]
if profile:
profilers[name].disable()
start("first")
predictor.predict(["SIINFEKL"], allele="HLA-A*02:01")
end("first")
peptides = random_peptides(num)
start("pred_%d" % num)
predictor.predict(peptides, allele="HLA-A*02:01")
end("pred_%d" % num)
print("SPEED BENCHMARK")
print("Results:\n%s" % str(pandas.Series(timings)))
return dict(
(key, pstats.Stats(value)) for (key, value) in profilers.items())
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
"--predictor",
nargs="+",
choices=["allele-specific", "pan-allele"],
default=["allele-specific", "pan-allele"],
help="Which predictors to run")
parser.add_argument(
"--num-predictions",
type=int,
default=DEFAULT_NUM_PREDICTIONS,
help="Number of predictions to run")
if __name__ == '__main__':
# If run directly from python, do profiling and leave the user in a shell
# to explore results.
args = parser.parse_args(sys.argv[1:])
if "allele-specific" in args.predictor:
print("Running allele-specific test")
result = test_speed_allele_specific(
profile=True, num=args.num_predictions)
result[
"pred_%d" % args.num_predictions
].sort_stats("cumtime").reverse_order().print_stats()
if "pan-allele" in args.predictor:
print("Running pan-allele test")
result = test_speed_pan_allele(
profile=True, num=args.num_predictions)
result[
"pred_%d" % args.num_predictions
].sort_stats("cumtime").reverse_order().print_stats()
# Leave in ipython
locals().update(result)
import ipdb # pylint: disable=import-error
ipdb.set_trace()