From ebc915a664016650414fa3d55c3e241fb35b5d2f Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Sat, 27 Jan 2018 19:13:13 -0500 Subject: [PATCH] memory usage hack --- mhcflurry/class1_affinity_predictor.py | 75 ++----------------- .../train_allele_specific_models_command.py | 8 +- 2 files changed, 9 insertions(+), 74 deletions(-) diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index 10ac0e55..aee7d2e5 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -584,76 +584,6 @@ class Class1AffinityPredictor(object): return models - def calibrate_percentile_ranks( - self, - peptides=None, - num_peptides_per_length=int(1e5), - alleles=None, - bins=None, - quiet=False, - worker_pool=None): - """ - Compute the cumulative distribution of ic50 values for a set of alleles - over a large universe of random peptides, to enable computing quantiles in - this distribution later. - - Parameters - ---------- - peptides : sequence of string, optional - Peptides to use - num_peptides_per_length : int, optional - If peptides argument is not specified, then num_peptides_per_length - peptides are randomly sampled from a uniform distribution for each - supported length - alleles : sequence of string, optional - Alleles to perform calibration for. If not specified all supported - alleles will be calibrated. - bins : object - Anything that can be passed to numpy.histogram's "bins" argument - can be used here, i.e. either an integer or a sequence giving bin - edges. This is in ic50 space. - quiet : boolean - If False (default), status updates will be printed to stdout. - """ - if bins is None: - bins = to_ic50(numpy.linspace(1, 0, 1000)) - - if alleles is None: - alleles = self.supported_alleles - - if peptides is None: - peptides = [] - lengths = range( - self.supported_peptide_lengths[0], - self.supported_peptide_lengths[1] + 1) - for length in lengths: - peptides.extend( - random_peptides(num_peptides_per_length, length)) - - - - if quiet: - def msg(s): - pass - else: - def msg(s): - print(s) - sys.stdout.flush() - - encoded_peptides = EncodableSequences.create(peptides) - for (i, allele) in enumerate(alleles): - msg("Calibrating percentile ranks for allele %03d/%03d: %s" % ( - i + 1, len(alleles), allele)) - start = time.time() - predictions = self.predict(encoded_peptides, allele=allele) - msg("Generated %d predictions in %0.2f sec." % ( - len(predictions), time.time() - start)) - transform = PercentRankTransform() - transform.fit(predictions, bins=bins) - self.allele_to_percent_rank_transform[allele] = transform - msg("Done calibrating allele %s in %0.2f sec." % ( - allele, time.time() - start)) - def percentile_ranks(self, affinities, allele=None, alleles=None, throw=True): """ Return percentile ranks for the given ic50 affinities and alleles. @@ -1003,6 +933,11 @@ class Class1AffinityPredictor(object): if worker_pool and len(alleles) > 1: # Run in parallel + + # Performance hack. + for network in self.neural_networks: + network.peptides_to_network_input(encoded_peptides) + do_work = partial( _calibrate_percentile_ranks, predictor=self, diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py index 82763d6e..dfd77d4c 100644 --- a/mhcflurry/train_allele_specific_models_command.py +++ b/mhcflurry/train_allele_specific_models_command.py @@ -223,8 +223,8 @@ def run(argv=sys.argv[1:]): print("*" * 30) training_time = time.time() - start - print("Trained affinity predictor with %d networks in %0.2f sec." % ( - len(predictor.neural_networks), training_time)) + print("Trained affinity predictor with %d networks in %0.2f min." % ( + len(predictor.neural_networks), training_time / 60.0)) print("*" * 30) if args.percent_rank_calibration_num_peptides_per_length > 0: @@ -242,8 +242,8 @@ def run(argv=sys.argv[1:]): worker_pool.close() worker_pool.join() - print("Train time: %0.2f sec. Percent rank calibration time: %0.2f sec." % ( - training_time, percent_rank_calibration_time)) + print("Train time: %0.2f min. Percent rank calibration time: %0.2f min." % ( + training_time / 60.0, percent_rank_calibration_time / 60.0)) print("Predictor written to: %s" % args.out_models_dir) -- GitLab