From e05a7f638a894ed3da6a497f09b746fb51771125 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Wed, 31 Jul 2019 17:10:51 -0400 Subject: [PATCH] fix --- downloads-generation/models_class1_pan/GENERATE.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/downloads-generation/models_class1_pan/GENERATE.sh b/downloads-generation/models_class1_pan/GENERATE.sh index 2c2dd104..e8164043 100755 --- a/downloads-generation/models_class1_pan/GENERATE.sh +++ b/downloads-generation/models_class1_pan/GENERATE.sh @@ -54,11 +54,15 @@ do --num-jobs 0 \ --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 + # For now we calibrate percentile ranks only for alleles for which there + # is training data. Calibrating all alleles would be too slow. + # This could be improved though. time mhcflurry-calibrate-percentile-ranks \ --models-dir models.${kind} \ --match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \ --motif-summary \ --num-peptides-per-length 100000 \ + --allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \ --verbosity 1 \ --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 done -- GitLab