diff --git a/downloads-generation/models_class1_pan/GENERATE.sh b/downloads-generation/models_class1_pan/GENERATE.sh index 2c2dd104275e2351407fc60134dcc04926f4e28d..e8164043f79367382a0b62f4928e927fab072b31 100755 --- a/downloads-generation/models_class1_pan/GENERATE.sh +++ b/downloads-generation/models_class1_pan/GENERATE.sh @@ -54,11 +54,15 @@ do --num-jobs 0 \ --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 + # For now we calibrate percentile ranks only for alleles for which there + # is training data. Calibrating all alleles would be too slow. + # This could be improved though. time mhcflurry-calibrate-percentile-ranks \ --models-dir models.${kind} \ --match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \ --motif-summary \ --num-peptides-per-length 100000 \ + --allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \ --verbosity 1 \ --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 done