From 4241da44fd68279b2483a3d1fb86c38b54da39dd Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Tue, 27 Aug 2019 14:34:02 -0400
Subject: [PATCH] fix

---
 .../GENERATE.WITH_HPC_CLUSTER.sh              |  1 +
 .../cluster_submit_script_header.mssm_hpc.lsf |  1 +
 .../calibrate_percentile_ranks_command.py     | 21 ++++++++++++++++---
 mhcflurry/class1_affinity_predictor.py        |  6 ++++--
 4 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/downloads-generation/models_class1_pan/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/models_class1_pan/GENERATE.WITH_HPC_CLUSTER.sh
index 7a31a652..84e040cd 100755
--- a/downloads-generation/models_class1_pan/GENERATE.WITH_HPC_CLUSTER.sh
+++ b/downloads-generation/models_class1_pan/GENERATE.WITH_HPC_CLUSTER.sh
@@ -74,6 +74,7 @@ do
         --allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \
         --verbosity 1 \
         --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
+        --prediction-batch-size 524288 \
         --cluster-parallelism \
         --cluster-submit-command bsub \
         --cluster-results-workdir ~/mhcflurry-scratch \
diff --git a/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf b/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf
index 4d334e0d..6fb96c1f 100644
--- a/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf
+++ b/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf
@@ -18,6 +18,7 @@ export PYTHONUNBUFFERED=1
 export KMP_SETTINGS=1
 
 set -e
+set -x
 free -m
 
 module add cuda/10.0.130 cudnn/7.1.1
diff --git a/mhcflurry/calibrate_percentile_ranks_command.py b/mhcflurry/calibrate_percentile_ranks_command.py
index e06d0bfd..31aeccc1 100644
--- a/mhcflurry/calibrate_percentile_ranks_command.py
+++ b/mhcflurry/calibrate_percentile_ranks_command.py
@@ -80,6 +80,11 @@ parser.add_argument(
     nargs=2,
     help="Min and max peptide length to calibrate, inclusive. "
     "Default: %(default)s")
+parser.add_argument(
+    "--prediction-batch-size",
+    type=int,
+    default=4096,
+    help="Keras batch size for predictions")
 parser.add_argument(
     "--verbosity",
     type=int,
@@ -149,7 +154,10 @@ def run(argv=sys.argv[1:]):
     GLOBAL_DATA["args"] = {
         'motif_summary': args.motif_summary,
         'summary_top_peptide_fractions': args.summary_top_peptide_fraction,
-        'verbose': args.verbosity > 0
+        'verbose': args.verbosity > 0,
+        'model_kwargs': {
+            'batch_size': args.prediction_batch_size,
+        }
     }
     del encoded_peptides
 
@@ -222,13 +230,20 @@ def calibrate_percentile_ranks(
         peptides=None,
         motif_summary=False,
         summary_top_peptide_fractions=[0.001],
-        verbose=False):
+        verbose=False,
+        model_kwargs={}):
+    if verbose:
+        print("Calibrating", allele)
+    start = time.time()
     summary_results = predictor.calibrate_percentile_ranks(
         peptides=peptides,
         alleles=[allele],
         motif_summary=motif_summary,
         summary_top_peptide_fractions=summary_top_peptide_fractions,
-        verbose=verbose)
+        verbose=verbose,
+        model_kwargs=model_kwargs)
+    if verbose:
+        print("Done calibrating", allele, "in", time.time() - start, "sec")
     transforms = {
         allele: predictor.allele_to_percent_rank_transform[allele],
     }
diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py
index 241560d9..b3a54bfe 100644
--- a/mhcflurry/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_predictor.py
@@ -1157,7 +1157,8 @@ class Class1AffinityPredictor(object):
             bins=None,
             motif_summary=False,
             summary_top_peptide_fractions=[0.001],
-            verbose=False):
+            verbose=False,
+            model_kwargs={}):
         """
         Compute the cumulative distribution of ic50 values for a set of alleles
         over a large universe of random peptides, to enable computing quantiles in
@@ -1208,7 +1209,8 @@ class Class1AffinityPredictor(object):
             length_distributions = None
         for (i, allele) in enumerate(alleles):
             start = time.time()
-            predictions = self.predict(encoded_peptides, allele=allele)
+            predictions = self.predict(
+                encoded_peptides, allele=allele, model_kwargs=model_kwargs)
             if verbose:
                 elapsed = time.time() - start
                 print(
-- 
GitLab