From 67df0dac805f746148bcb0b81a9690aeb4d54571 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sat, 25 Jan 2020 17:39:35 -0500
Subject: [PATCH] fixes

---
 .../GENERATE.WITH_HPC_CLUSTER.sh              |  1 -
 .../data_evaluation/GENERATE.sh               | 59 +++++++++++++------
 .../data_evaluation/predict.py                |  0
 3 files changed, 40 insertions(+), 20 deletions(-)
 delete mode 100755 downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh
 delete mode 100644 downloads-generation/data_evaluation/predict.py

diff --git a/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh
deleted file mode 100755
index 53125eb7..00000000
--- a/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh
+++ /dev/null
@@ -1 +0,0 @@
-bash GENERATE.sh cluster
diff --git a/downloads-generation/data_evaluation/GENERATE.sh b/downloads-generation/data_evaluation/GENERATE.sh
index 2d904d9a..9a8c336e 100755
--- a/downloads-generation/data_evaluation/GENERATE.sh
+++ b/downloads-generation/data_evaluation/GENERATE.sh
@@ -96,8 +96,10 @@ else
     time mhcflurry-predict \
         benchmark.monoallelic.csv.bz2 \
         --allele-column hla \
-        --prediction-column-prefix prediction.no_additional_ms \
+        --prediction-column-prefix no_additional_ms_ \
         --models "$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms" \
+        --affinity-only \
+        --no-affinity-percentile \
         --out benchmark.monoallelic.predictions.csv \
         --no-throw
     bzip2 -f benchmark.monoallelic.predictions.csv
@@ -126,27 +128,45 @@ then
     echo "Reusing existing multiallelic predictions"
 else
     cp $SCRIPT_DIR/predict.py .
-    time python predict.py \
-        benchmark.multiallelic.csv \
-        --models \
-            "$(mhcflurry-downloads path models_class1_pan)/models.combined" \
-            "$(mhcflurry-downloads path models_class1_pan_variants)/models.*" \
-        --out "$(pwd)/benchmark.multiallelic.predictions.csv"
+    time mhcflurry-predict \
+        benchmark.multiallelic.csv.bz2 \
+        --allele-column hla \
+        --prediction-column-prefix mhcflurry_production_ \
+        --models "$(mhcflurry-downloads path models_class1_pan)/models.combined" \
+        --affinity-only \
+        --no-affinity-percentile \
+        --out "$(pwd)/benchmark.multiallelic.predictions1.csv"
+
+    for variant in no_additional_ms compact_peptide affinity_only no_pretrain single_hidden_no_pretrain
+    do
+        time mhcflurry-predict \
+            "$(pwd)/benchmark.multiallelic.predictions1.csv" \
+            --allele-column hla \
+            --prediction-column-prefix "${variant}_" \
+            --models "$(mhcflurry-downloads path models_class1_pan_variants)/models.$variant" \
+            --affinity-only \
+            --no-affinity-percentile \
+            --out "$(pwd)/benchmark.multiallelic.predictions1.csv"
+    done
+
     bzip2 -f benchmark.multiallelic.predictions1.csv
     rm -f benchmark.multiallelic.predictions2.csv.bz2
 fi
 
+
 ### PRESENTATION: WITH FLANKS
 if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions2.csv.bz2" ]
 then
     echo "Reusing existing multiallelic predictions2"
 else
-    mhcflurry-predict-presentation \
-        "$(pwd)/benchmark.multiallelic.predictions1.csv" \
-        --out "$(pwd)/benchmark.multiallelic.predictions2.csv" \
+    time mhcflurry-predict \
+        "$(pwd)/benchmark.multiallelic.predictions1.csv.bz2" \
+        --allele-column hla \
+        --prediction-column-prefix presentation_with_flanks_ \
         --models "$(mhcflurry-downloads path models_class1_presentation)/models" \
-        --include-details \
-        --prediction-col presentation_with_flanks \
+        --no-affinity-percentile \
+        --out "$(pwd)/benchmark.multiallelic.predictions2.csv"
+
     bzip2 -f benchmark.multiallelic.predictions2.csv
     rm -f benchmark.multiallelic.predictions3.csv.bz2
 fi
@@ -156,18 +176,19 @@ if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions
 then
     echo "Reusing existing multiallelic predictions3"
 else
-    mhcflurry-predict-presentation \
-        "$(pwd)/benchmark.multiallelic.predictions2.csv" \
-        --out "$(pwd)/benchmark.multiallelic.predictions3.csv" \
+    time mhcflurry-predict \
+        "$(pwd)/benchmark.multiallelic.predictions2.csv.bz2" \
+        --allele-column hla \
+        --prediction-column-prefix presentation_with_flanks_ \
         --models "$(mhcflurry-downloads path models_class1_presentation)/models" \
-        --include-details \
-        --prediction-col presentation_without_flanks \
-        --no-flanks
+        --no-affinity-percentile \
+        --no-flanking \
+        --out "$(pwd)/benchmark.multiallelic.predictions3.csv"
+
     bzip2 -f benchmark.multiallelic.predictions3.csv
 fi
 
 
-
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 -f "$LOG"
 for i in $(ls LOG-worker.*.txt) ; do bzip2 -f $i ; done
diff --git a/downloads-generation/data_evaluation/predict.py b/downloads-generation/data_evaluation/predict.py
deleted file mode 100644
index e69de29b..00000000
-- 
GitLab