From 0805397e0e1884f1f822675acd4be271ade3c086 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sun, 6 Oct 2019 21:46:41 -0400
Subject: [PATCH] fix

---
 .../data_mass_spec_benchmark/GENERATE.sh      | 55 +++++++++----------
 .../run_predictors.py                         | 16 ++++--
 2 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh
index f42013b3..fbb1db71 100755
--- a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh
+++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh
@@ -98,8 +98,7 @@ fi
 # Write out and process peptides.
 # First just chr1 peptides, then all peptides.
 # TODO: switch this back
-#for subset in chr1 all
-for subset in all chr1
+for subset in chr1 all
 do
     if [ "$2" == "reuse-all" ]
     then
@@ -120,30 +119,32 @@ do
     fi
 
     # Run netmhcpan4
-    OUT_DIR=predictions/${subset}.netmhcpan4
-    REUSE1=""
-    REUSE2=""
-    if [ "$subset" == "all" ]
-    then
-        #REUSE1="predictions/chr1.netmhcpan4"
-        # TODO: switch this back
-        REUSE1="$EXISTING_DATA"/predictions/chr1.netmhcpan4
-    fi
-    if [ "${2:-reuse-none}" != "reuse-none" ]
-    then
-        REUSE2="$EXISTING_DATA"/$OUT_DIR
-    fi
+    for kind in el ba
+    do
+        OUT_DIR=predictions/${subset}.netmhcpan4.$kind
+        REUSE1=""
+        REUSE2=""
+        if [ "$subset" == "all" ]
+        then
+            REUSE1="predictions/chr1.netmhcpan4.$kind"
+        fi
+        if [ "${2:-reuse-none}" != "reuse-none" ]
+        then
+            REUSE2="$EXISTING_DATA"/$OUT_DIR
+        fi
+
+        python run_predictors.py \
+            proteome_peptides.$subset.csv.bz2 \
+            --result-dtype "float16" \
+            --predictor netmhcpan4-$kind \
+            --chunk-size 10000 \
+            --allele $(cat alleles.txt) \
+            --out "$OUT_DIR" \
+            --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
+            --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \
+            --reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS
+    done
 
-    python run_predictors.py \
-        proteome_peptides.$subset.csv.bz2 \
-        --result-dtype "float16" \
-        --predictor netmhcpan4 \
-        --chunk-size 10000 \
-        --allele $(cat alleles.txt) \
-        --out "$OUT_DIR" \
-        --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
-        --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \
-        --reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS
 
     # Run MHCflurry
     for kind in with_mass_spec no_mass_spec
@@ -153,9 +154,7 @@ do
         REUSE2=""
         if [ "$subset" == "all" ]
         then
-            #REUSE1="predictions/chr1.mhcflurry.${kind}"
-            # TODO: switch this back
-            REUSE1="$EXISTING_DATA"/predictions/chr1.mhcflurry.${kind}
+            REUSE1="predictions/chr1.mhcflurry.${kind}"
         fi
         if [ "${2:-reuse-none}" != "reuse-none" ] && [ "${2:-reuse-none}" != "reuse-predictions-except-mhcflurry" ]
         then
diff --git a/downloads-generation/data_mass_spec_benchmark/run_predictors.py b/downloads-generation/data_mass_spec_benchmark/run_predictors.py
index d4324737..be034ba9 100644
--- a/downloads-generation/data_mass_spec_benchmark/run_predictors.py
+++ b/downloads-generation/data_mass_spec_benchmark/run_predictors.py
@@ -43,7 +43,7 @@ parser.add_argument(
 parser.add_argument(
     "--predictor",
     required=True,
-    choices=("mhcflurry", "netmhcpan4"))
+    choices=("mhcflurry", "netmhcpan4-ba", "netmhcpan4-el"))
 parser.add_argument(
     "--mhcflurry-models-dir",
     metavar="DIR",
@@ -88,7 +88,8 @@ add_cluster_parallelism_args(parser)
 
 PREDICTOR_TO_COLS = {
     "mhcflurry": ["affinity"],
-    "netmhcpan4": ["affinity", "percentile_rank", "elution_score"],
+    "netmhcpan4-ba": ["affinity", "percentile_rank"],
+    "netmhcpan4-el": ["elution_score"],
 }
 
 
@@ -376,9 +377,16 @@ def do_predictions_mhctools(work_item_dicts, constant_data=None):
         result = {}
         results.append((work_item_num, result))
 
-        if predictor_name == "netmhcpan4":
+        if predictor_name == "netmhcpan4-ba":
             predictor = mhctools.NetMHCpan4(
-                alleles=alleles, program_name="netMHCpan-4.0")
+                alleles=alleles,
+                program_name="netMHCpan-4.0",
+                mode="binding_affinity")
+        elif predictor_name == "netmhcpan4-el":
+            predictor = mhctools.NetMHCpan4(
+                alleles=alleles,
+                program_name="netMHCpan-4.0",
+                mode="elution_score")
         else:
             raise ValueError("Unsupported", predictor_name)
 
-- 
GitLab