From 67df0dac805f746148bcb0b81a9690aeb4d54571 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Sat, 25 Jan 2020 17:39:35 -0500 Subject: [PATCH] fixes --- .../GENERATE.WITH_HPC_CLUSTER.sh | 1 - .../data_evaluation/GENERATE.sh | 59 +++++++++++++------ .../data_evaluation/predict.py | 0 3 files changed, 40 insertions(+), 20 deletions(-) delete mode 100755 downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh delete mode 100644 downloads-generation/data_evaluation/predict.py diff --git a/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh deleted file mode 100755 index 53125eb7..00000000 --- a/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh +++ /dev/null @@ -1 +0,0 @@ -bash GENERATE.sh cluster diff --git a/downloads-generation/data_evaluation/GENERATE.sh b/downloads-generation/data_evaluation/GENERATE.sh index 2d904d9a..9a8c336e 100755 --- a/downloads-generation/data_evaluation/GENERATE.sh +++ b/downloads-generation/data_evaluation/GENERATE.sh @@ -96,8 +96,10 @@ else time mhcflurry-predict \ benchmark.monoallelic.csv.bz2 \ --allele-column hla \ - --prediction-column-prefix prediction.no_additional_ms \ + --prediction-column-prefix no_additional_ms_ \ --models "$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms" \ + --affinity-only \ + --no-affinity-percentile \ --out benchmark.monoallelic.predictions.csv \ --no-throw bzip2 -f benchmark.monoallelic.predictions.csv @@ -126,27 +128,45 @@ then echo "Reusing existing multiallelic predictions" else cp $SCRIPT_DIR/predict.py . - time python predict.py \ - benchmark.multiallelic.csv \ - --models \ - "$(mhcflurry-downloads path models_class1_pan)/models.combined" \ - "$(mhcflurry-downloads path models_class1_pan_variants)/models.*" \ - --out "$(pwd)/benchmark.multiallelic.predictions.csv" + time mhcflurry-predict \ + benchmark.multiallelic.csv.bz2 \ + --allele-column hla \ + --prediction-column-prefix mhcflurry_production_ \ + --models "$(mhcflurry-downloads path models_class1_pan)/models.combined" \ + --affinity-only \ + --no-affinity-percentile \ + --out "$(pwd)/benchmark.multiallelic.predictions1.csv" + + for variant in no_additional_ms compact_peptide affinity_only no_pretrain single_hidden_no_pretrain + do + time mhcflurry-predict \ + "$(pwd)/benchmark.multiallelic.predictions1.csv" \ + --allele-column hla \ + --prediction-column-prefix "${variant}_" \ + --models "$(mhcflurry-downloads path models_class1_pan_variants)/models.$variant" \ + --affinity-only \ + --no-affinity-percentile \ + --out "$(pwd)/benchmark.multiallelic.predictions1.csv" + done + bzip2 -f benchmark.multiallelic.predictions1.csv rm -f benchmark.multiallelic.predictions2.csv.bz2 fi + ### PRESENTATION: WITH FLANKS if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions2.csv.bz2" ] then echo "Reusing existing multiallelic predictions2" else - mhcflurry-predict-presentation \ - "$(pwd)/benchmark.multiallelic.predictions1.csv" \ - --out "$(pwd)/benchmark.multiallelic.predictions2.csv" \ + time mhcflurry-predict \ + "$(pwd)/benchmark.multiallelic.predictions1.csv.bz2" \ + --allele-column hla \ + --prediction-column-prefix presentation_with_flanks_ \ --models "$(mhcflurry-downloads path models_class1_presentation)/models" \ - --include-details \ - --prediction-col presentation_with_flanks \ + --no-affinity-percentile \ + --out "$(pwd)/benchmark.multiallelic.predictions2.csv" + bzip2 -f benchmark.multiallelic.predictions2.csv rm -f benchmark.multiallelic.predictions3.csv.bz2 fi @@ -156,18 +176,19 @@ if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions then echo "Reusing existing multiallelic predictions3" else - mhcflurry-predict-presentation \ - "$(pwd)/benchmark.multiallelic.predictions2.csv" \ - --out "$(pwd)/benchmark.multiallelic.predictions3.csv" \ + time mhcflurry-predict \ + "$(pwd)/benchmark.multiallelic.predictions2.csv.bz2" \ + --allele-column hla \ + --prediction-column-prefix presentation_with_flanks_ \ --models "$(mhcflurry-downloads path models_class1_presentation)/models" \ - --include-details \ - --prediction-col presentation_without_flanks \ - --no-flanks + --no-affinity-percentile \ + --no-flanking \ + --out "$(pwd)/benchmark.multiallelic.predictions3.csv" + bzip2 -f benchmark.multiallelic.predictions3.csv fi - cp $SCRIPT_ABSOLUTE_PATH . bzip2 -f "$LOG" for i in $(ls LOG-worker.*.txt) ; do bzip2 -f $i ; done diff --git a/downloads-generation/data_evaluation/predict.py b/downloads-generation/data_evaluation/predict.py deleted file mode 100644 index e69de29b..00000000 -- GitLab