From ebc915a664016650414fa3d55c3e241fb35b5d2f Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sat, 27 Jan 2018 19:13:13 -0500
Subject: [PATCH] memory usage hack

---
 mhcflurry/class1_affinity_predictor.py        | 75 ++-----------------
 .../train_allele_specific_models_command.py   |  8 +-
 2 files changed, 9 insertions(+), 74 deletions(-)

diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py
index 10ac0e55..aee7d2e5 100644
--- a/mhcflurry/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_predictor.py
@@ -584,76 +584,6 @@ class Class1AffinityPredictor(object):
 
         return models
 
-    def calibrate_percentile_ranks(
-            self,
-            peptides=None,
-            num_peptides_per_length=int(1e5),
-            alleles=None,
-            bins=None,
-            quiet=False,
-            worker_pool=None):
-        """
-        Compute the cumulative distribution of ic50 values for a set of alleles
-        over a large universe of random peptides, to enable computing quantiles in
-        this distribution later.
-
-        Parameters
-        ----------
-        peptides : sequence of string, optional
-            Peptides to use
-        num_peptides_per_length : int, optional
-            If peptides argument is not specified, then num_peptides_per_length
-            peptides are randomly sampled from a uniform distribution for each
-            supported length
-        alleles : sequence of string, optional
-            Alleles to perform calibration for. If not specified all supported
-            alleles will be calibrated.
-        bins : object
-            Anything that can be passed to numpy.histogram's "bins" argument
-            can be used here, i.e. either an integer or a sequence giving bin
-            edges. This is in ic50 space.
-        quiet : boolean
-            If False (default), status updates will be printed to stdout.
-        """
-        if bins is None:
-            bins = to_ic50(numpy.linspace(1, 0, 1000))
-
-        if alleles is None:
-            alleles = self.supported_alleles
-
-        if peptides is None:
-            peptides = []
-            lengths = range(
-                self.supported_peptide_lengths[0],
-                self.supported_peptide_lengths[1] + 1)
-            for length in lengths:
-                peptides.extend(
-                    random_peptides(num_peptides_per_length, length))
-
-
-
-        if quiet:
-            def msg(s):
-                pass
-        else:
-            def msg(s):
-                print(s)
-                sys.stdout.flush()
-
-        encoded_peptides = EncodableSequences.create(peptides)
-        for (i, allele) in enumerate(alleles):
-            msg("Calibrating percentile ranks for allele %03d/%03d: %s" % (
-                i + 1, len(alleles), allele))
-            start = time.time()
-            predictions = self.predict(encoded_peptides, allele=allele)
-            msg("Generated %d predictions in %0.2f sec." % (
-                len(predictions), time.time() - start))
-            transform = PercentRankTransform()
-            transform.fit(predictions, bins=bins)
-            self.allele_to_percent_rank_transform[allele] = transform
-            msg("Done calibrating allele %s in %0.2f sec." % (
-                allele, time.time() - start))
-
     def percentile_ranks(self, affinities, allele=None, alleles=None, throw=True):
         """
         Return percentile ranks for the given ic50 affinities and alleles.
@@ -1003,6 +933,11 @@ class Class1AffinityPredictor(object):
 
         if worker_pool and len(alleles) > 1:
             # Run in parallel
+
+            # Performance hack.
+            for network in self.neural_networks:
+                network.peptides_to_network_input(encoded_peptides)
+
             do_work = partial(
                 _calibrate_percentile_ranks,
                 predictor=self,
diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py
index 82763d6e..dfd77d4c 100644
--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
@@ -223,8 +223,8 @@ def run(argv=sys.argv[1:]):
 
     print("*" * 30)
     training_time = time.time() - start
-    print("Trained affinity predictor with %d networks in %0.2f sec." % (
-        len(predictor.neural_networks), training_time))
+    print("Trained affinity predictor with %d networks in %0.2f min." % (
+        len(predictor.neural_networks), training_time / 60.0))
     print("*" * 30)
 
     if args.percent_rank_calibration_num_peptides_per_length > 0:
@@ -242,8 +242,8 @@ def run(argv=sys.argv[1:]):
         worker_pool.close()
         worker_pool.join()
 
-    print("Train time: %0.2f sec. Percent rank calibration time: %0.2f sec." % (
-        training_time, percent_rank_calibration_time))
+    print("Train time: %0.2f min. Percent rank calibration time: %0.2f min." % (
+        training_time / 60.0, percent_rank_calibration_time / 60.0))
     print("Predictor written to: %s" % args.out_models_dir)
 
 
-- 
GitLab