From e05a7f638a894ed3da6a497f09b746fb51771125 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Wed, 31 Jul 2019 17:10:51 -0400
Subject: [PATCH] fix

---
 downloads-generation/models_class1_pan/GENERATE.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/downloads-generation/models_class1_pan/GENERATE.sh b/downloads-generation/models_class1_pan/GENERATE.sh
index 2c2dd104..e8164043 100755
--- a/downloads-generation/models_class1_pan/GENERATE.sh
+++ b/downloads-generation/models_class1_pan/GENERATE.sh
@@ -54,11 +54,15 @@ do
         --num-jobs 0 \
         --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
 
+    # For now we calibrate percentile ranks only for alleles for which there
+    # is training data. Calibrating all alleles would be too slow.
+    # This could be improved though.
     time mhcflurry-calibrate-percentile-ranks \
         --models-dir models.${kind} \
         --match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \
         --motif-summary \
         --num-peptides-per-length 100000 \
+        --allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \
         --verbosity 1 \
         --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
 done
-- 
GitLab