Add mixmhcpred to mass spec benchmark

fb4bb8a2 · Tim O'Donnell · 3136d2f1 · fb4bb8a2 · fb4bb8a2
Commit fb4bb8a2 authored 5 years ago by Tim O'Donnell
--- a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh
+++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh
@@ -96,9 +96,7 @@ else
 fi
 # Write out and process peptides.
-# First just chr1 peptides, then all peptides.
+for subset in all
-# TODO: switch this back
-for subset in chr1 all
 do
    if [ "$2" == "reuse-all" ]
    then
@@ -118,19 +116,33 @@ do
        bzip2 proteome_peptides.$subset.csv
    fi
+    # Run mixmhcpred
+    OUT_DIR=predictions/${subset}.mixmhcpred
+    REUSE=""
+    if [ "${2:-reuse-none}" != "reuse-none" ]
+    then
+        REUSE="$EXISTING_DATA"/$OUT_DIR
+    fi
+    python run_predictors.py \
+        proteome_peptides.$subset.csv.bz2 \
+        --result-dtype "float16" \
+        --predictor mixmhcpred \
+        --chunk-size 500000 \
+        --allele $(cat alleles.txt) \
+        --out "$OUT_DIR" \
+        --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
+        --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \
+        --reuse-predictions "$REUSE" $EXTRA_ARGS
    # Run netmhcpan4
    for kind in el ba
    do
        OUT_DIR=predictions/${subset}.netmhcpan4.$kind
-        REUSE1=""
+        REUSE=""
-        REUSE2=""
-        if [ "$subset" == "all" ]
-        then
-            REUSE1="predictions/chr1.netmhcpan4.$kind"
-        fi
        if [ "${2:-reuse-none}" != "reuse-none" ]
        then
-            REUSE2="$EXISTING_DATA"/$OUT_DIR
+            REUSE="$EXISTING_DATA"/$OUT_DIR
        fi
        python run_predictors.py \
@@ -138,11 +150,11 @@ do
            --result-dtype "float16" \
            --predictor netmhcpan4-$kind \
            --chunk-size 1000 \
-            --allele $(cat alleles.txt | grep -v '31:0102') \
+            --allele $(cat alleles.txt) \
            --out "$OUT_DIR" \
            --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
            --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \
-            --reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS
+            --reuse-predictions "$REUSE" $EXTRA_ARGS
    done
@@ -150,15 +162,10 @@ do
    for kind in combined
    do
        OUT_DIR=predictions/${subset}.mhcflurry.${kind}
-        REUSE1=""
+        REUSE=""
-        REUSE2=""
-        if [ "$subset" == "all" ]
-        then
-            REUSE1="predictions/chr1.mhcflurry.${kind}"
-        fi
        if [ "${2:-reuse-none}" != "reuse-none" ] && [ "${2:-reuse-none}" != "reuse-predictions-except-mhcflurry" ]
        then
-            REUSE2="$EXISTING_DATA"/$OUT_DIR
+            REUSE="$EXISTING_DATA"/$OUT_DIR
        fi
        python run_predictors.py \
@@ -172,7 +179,7 @@ do
            --out "$OUT_DIR" \
            --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
            --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.gpu.lsf \
-            --reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS
+            --reuse-predictions "$REUSE" $EXTRA_ARGS
    done
 done

--- a/downloads-generation/data_mass_spec_benchmark/run_predictors.py
+++ b/downloads-generation/data_mass_spec_benchmark/run_predictors.py
@@ -43,7 +43,7 @@ parser.add_argument(
 parser.add_argument(
    "--predictor",
    required=True,
-    choices=("mhcflurry", "netmhcpan4-ba", "netmhcpan4-el"))
+    choices=("mhcflurry", "netmhcpan4-ba", "netmhcpan4-el", "mixmhcpred"))
 parser.add_argument(
    "--mhcflurry-models-dir",
    metavar="DIR",
@@ -90,6 +90,7 @@ PREDICTOR_TO_COLS = {
    "mhcflurry": ["affinity"],
    "netmhcpan4-ba": ["affinity", "percentile_rank"],
    "netmhcpan4-el": ["elution_score"],
+    "mixmhcpred": ["elution_score"],
 }
@@ -392,6 +393,11 @@ def do_predictions_mhctools(work_item_dicts, constant_data=None):
                alleles=alleles,
                program_name="netMHCpan-4.0",
                mode="elution_score")
+        elif predictor_name == "mixmhcpred":
+            predictor = mhctools.MixMHCpred(
+                alleles=alleles,
+                program_name="netMHCpan-4.0",
+                mode="elution_score")
        else:
            raise ValueError("Unsupported", predictor_name)