Skip to content
Snippets Groups Projects
test_speed.py 4.42 KiB
Newer Older
Tim O'Donnell's avatar
Tim O'Donnell committed
"""
Profile prediction speed

"""
Tim O'Donnell's avatar
Tim O'Donnell committed
import numpy
numpy.random.seed(0)
import time
import cProfile
import pstats
Tim O'Donnell's avatar
Tim O'Donnell committed
import collections
Tim O'Donnell's avatar
Tim O'Donnell committed
import argparse
import sys
Tim O'Donnell's avatar
Tim O'Donnell committed

import pandas

from mhcflurry import Class1AffinityPredictor
Tim O'Donnell's avatar
Tim O'Donnell committed
from mhcflurry.encodable_sequences import EncodableSequences
Tim O'Donnell's avatar
Tim O'Donnell committed
from mhcflurry.common import random_peptides
Tim O'Donnell's avatar
Tim O'Donnell committed
from mhcflurry.downloads import get_path

from mhcflurry.testing_utils import module_cleanup
teardown = module_cleanup

Tim O'Donnell's avatar
Tim O'Donnell committed
ALLELE_SPECIFIC_PREDICTOR = Class1AffinityPredictor.load(
    get_path("models_class1", "models"))

PAN_ALLELE_PREDICTOR = Class1AffinityPredictor.load(
    get_path("models_class1_pan", "models.with_mass_spec"))
Tim O'Donnell's avatar
Tim O'Donnell committed
DEFAULT_NUM_PREDICTIONS = 10000
Tim O'Donnell's avatar
Tim O'Donnell committed
def test_speed_allele_specific(
        profile=False,
        predictor=ALLELE_SPECIFIC_PREDICTOR,
        num=DEFAULT_NUM_PREDICTIONS):

Tim O'Donnell's avatar
Tim O'Donnell committed
    starts = collections.OrderedDict()
    timings = collections.OrderedDict()
    profilers = collections.OrderedDict()
Tim O'Donnell's avatar
Tim O'Donnell committed

    def start(name):
        starts[name] = time.time()
        if profile:
            profilers[name] = cProfile.Profile()
            profilers[name].enable()

    def end(name):
        timings[name] = time.time() - starts[name]
        if profile:
            profilers[name].disable()

    start("first")
Tim O'Donnell's avatar
Tim O'Donnell committed
    predictor.predict(["SIINFEKL"], allele="HLA-A*02:01")
Tim O'Donnell's avatar
Tim O'Donnell committed
    end("first")

Tim O'Donnell's avatar
Tim O'Donnell committed
    peptides = random_peptides(num)
    start("pred_%d" % num)
    predictor.predict(peptides, allele="HLA-A*02:01")
    end("pred_%d" % num)
Tim O'Donnell's avatar
Tim O'Donnell committed
    NUM2 = 10000
    peptides = EncodableSequences.create(random_peptides(NUM2, length=13))
    start("encode_blosum_%d" % NUM2)
    peptides.variable_length_to_fixed_length_vector_encoding("BLOSUM62")
    end("encode_blosum_%d" % NUM2)

    start("pred_already_encoded_%d" % NUM2)
Tim O'Donnell's avatar
Tim O'Donnell committed
    predictor.predict(peptides, allele="HLA-A*02:01")
Tim O'Donnell's avatar
Tim O'Donnell committed
    end("pred_already_encoded_%d" % NUM2)

    NUM_REPEATS = 100
    start("pred_already_encoded_%d_%d_times" % (NUM2, NUM_REPEATS))
    for _ in range(NUM_REPEATS):
Tim O'Donnell's avatar
Tim O'Donnell committed
        predictor.predict(peptides, allele="HLA-A*02:01")
    end("pred_already_encoded_%d_%d_times" % (NUM2, NUM_REPEATS))

Tim O'Donnell's avatar
Tim O'Donnell committed
    print("SPEED BENCHMARK")
    print("Results:\n%s" % str(pandas.Series(timings)))

    return dict(
        (key, pstats.Stats(value)) for (key, value) in profilers.items())


Tim O'Donnell's avatar
Tim O'Donnell committed
def test_speed_pan_allele(
        profile=False,
        predictor=PAN_ALLELE_PREDICTOR,
        num=DEFAULT_NUM_PREDICTIONS):

    starts = collections.OrderedDict()
    timings = collections.OrderedDict()
    profilers = collections.OrderedDict()

    def start(name):
        starts[name] = time.time()
        if profile:
            profilers[name] = cProfile.Profile()
            profilers[name].enable()

    def end(name):
        timings[name] = time.time() - starts[name]
        if profile:
            profilers[name].disable()

    start("first")
    predictor.predict(["SIINFEKL"], allele="HLA-A*02:01")
    end("first")

    peptides = random_peptides(num)
    start("pred_%d" % num)
    predictor.predict(peptides, allele="HLA-A*02:01")
    end("pred_%d" % num)

    print("SPEED BENCHMARK")
    print("Results:\n%s" % str(pandas.Series(timings)))

    return dict(
        (key, pstats.Stats(value)) for (key, value) in profilers.items())


parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
    "--predictor",
    nargs="+",
    choices=["allele-specific", "pan-allele"],
    default=["allele-specific", "pan-allele"],
    help="Which predictors to run")

parser.add_argument(
    "--num-predictions",
    type=int,
    default=DEFAULT_NUM_PREDICTIONS,
    help="Number of predictions to run")

Tim O'Donnell's avatar
Tim O'Donnell committed
if __name__ == '__main__':
    # If run directly from python, do profiling and leave the user in a shell
    # to explore results.

Tim O'Donnell's avatar
Tim O'Donnell committed
    args = parser.parse_args(sys.argv[1:])

    if "allele-specific" in args.predictor:
        print("Running allele-specific test")
        result = test_speed_allele_specific(
            profile=True, num=args.num_predictions)
        result[
            "pred_%d" % args.num_predictions
        ].sort_stats("cumtime").reverse_order().print_stats()

    if "pan-allele" in args.predictor:
        print("Running pan-allele test")
        result = test_speed_pan_allele(
            profile=True, num=args.num_predictions)
        result[
            "pred_%d" % args.num_predictions
        ].sort_stats("cumtime").reverse_order().print_stats()
Tim O'Donnell's avatar
Tim O'Donnell committed

    # Leave in ipython
    locals().update(result)
    import ipdb  # pylint: disable=import-error
    ipdb.set_trace()