From a583028b00f3568fb0fbf671fecb963e57eff194 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sat, 25 Jan 2020 21:16:17 -0500
Subject: [PATCH] update

---
 .../data_evaluation/GENERATE.sh               | 158 ++++++++++--------
 .../cluster_submit_script_header.mssm_hpc.lsf |  42 +++++
 2 files changed, 127 insertions(+), 73 deletions(-)
 create mode 100644 downloads-generation/data_evaluation/cluster_submit_script_header.mssm_hpc.lsf

diff --git a/downloads-generation/data_evaluation/GENERATE.sh b/downloads-generation/data_evaluation/GENERATE.sh
index c2040ff3..50f90f99 100755
--- a/downloads-generation/data_evaluation/GENERATE.sh
+++ b/downloads-generation/data_evaluation/GENERATE.sh
@@ -14,25 +14,6 @@ SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
 SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
 SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
 
-if [ "$1" != "cluster" ]
-then
-    GPUS=$(nvidia-smi -L 2> /dev/null | wc -l) || GPUS=0
-    echo "Detected GPUS: $GPUS"
-
-    PROCESSORS=$(getconf _NPROCESSORS_ONLN)
-    echo "Detected processors: $PROCESSORS"
-
-    if [ "$GPUS" -eq "0" ]; then
-       NUM_JOBS=${NUM_JOBS-1}
-    else
-        NUM_JOBS=${NUM_JOBS-$GPUS}
-    fi
-    echo "Num jobs: $NUM_JOBS"
-    PARALLELISM_ARGS+=" --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1"
-else
-    PARALLELISM_ARGS+=" --cluster-parallelism --cluster-max-retries 3 --cluster-submit-command bsub --cluster-results-workdir $HOME/mhcflurry-scratch --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf"
-fi
-
 mkdir -p "$SCRATCH_DIR"
 if [ "$2" != "continue-incomplete" ]
 then
@@ -88,24 +69,6 @@ else
     rm -f benchmark.monoallelic.predictions.csv.bz2
 fi
 
-### AFFINITY PREDICTOR VARIANT: MONOALLELIC
-if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.monoallelic.predictions.csv.bz2" ]
-then
-    echo "Reusing existing monoallelic benchmark predictions"
-else
-    time mhcflurry-predict \
-        benchmark.monoallelic.csv.bz2 \
-        --allele-column hla \
-        --prediction-column-prefix no_additional_ms_ \
-        --models "$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms" \
-        --affinity-only \
-        --no-affinity-percentile \
-        --out benchmark.monoallelic.predictions.csv \
-        --no-throw
-    bzip2 -f benchmark.monoallelic.predictions.csv
-    ls -lh benchmark.monoallelic.predictions.csv.bz2
-fi
-
 ### GENERATE BENCHMARK: MULTIALLELIC
 if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.csv.bz2" ]
 then
@@ -114,79 +77,128 @@ else
     cp $SCRIPT_DIR/make_benchmark.py .
     time python make_benchmark.py \
         --hits "$(pwd)/hits_with_tpm.csv.bz2" \
-        --proteome-peptides "$(mhcflurry-downloads path data_mass_spec_benchmark)/proteome_peptides.all.csv.bz2" \
+        --proteome-peptides \""$(mhcflurry-downloads path data_mass_spec_benchmark)/proteome_peptides.all.csv.bz2"\" \
         --decoys-per-hit 99 \
         --only-format MULTIALLELIC \
         --out "$(pwd)/benchmark.multiallelic.csv"
     bzip2 -f benchmark.multiallelic.csv
-    rm -f benchmark.multiallelic.predictions1.csv.bz2
 fi
 
+rm -rf commands
+mkdir commands
+
+### AFFINITY PREDICTOR VARIANT: MONOALLELIC
+if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.monoallelic.predictions.csv.bz2" ]
+then
+    echo "Reusing existing monoallelic benchmark predictions"
+else
+    echo time mhcflurry-predict \
+        "$(pwd)/benchmark.monoallelic.csv.bz2" \
+        --allele-column hla \
+        --prediction-column-prefix no_additional_ms_ \
+        --models \""$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms"\" \
+        --affinity-only \
+        --no-affinity-percentile \
+        --out "$(pwd)/benchmark.monoallelic.predictions.csv" \
+        --no-throw >> commands/monoallelic.sh
+    echo bzip2 -f "$(pwd)/benchmark.monoallelic.predictions.csv" >> commands/monoallelic.sh
+fi
+
+
+
 ### AFFINITY PREDICTORS: MULTIALLELIC
-if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions1.csv.bz2" ]
+if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.production.csv.bz2" ]
 then
     echo "Reusing existing multiallelic predictions"
 else
-    time mhcflurry-predict \
-        benchmark.multiallelic.csv.bz2 \
+    echo time mhcflurry-predict \
+        "$(pwd)/benchmark.multiallelic.csv.bz2" \
         --allele-column hla \
         --prediction-column-prefix mhcflurry_production_ \
-        --models "$(mhcflurry-downloads path models_class1_pan)/models.combined" \
+        --models \""$(mhcflurry-downloads path models_class1_pan)/models.combined"\" \
         --affinity-only \
         --no-affinity-percentile \
-        --out "$(pwd)/benchmark.multiallelic.predictions1.csv"
+        --out "$(pwd)/benchmark.multiallelic.production.csv" >> commands/multiallelic.production.sh
+    echo bzip2 -f "$(pwd)/benchmark.multiallelic.production.csv" >> commands/multiallelic.production.sh
+fi
 
-    for variant in no_additional_ms compact_peptide affinity_only no_pretrain single_hidden_no_pretrain
-    do
-        time mhcflurry-predict \
-            "$(pwd)/benchmark.multiallelic.predictions1.csv" \
+for variant in no_additional_ms compact_peptide affinity_only no_pretrain single_hidden_no_pretrain
+do
+    if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.${variant}.csv.bz2" ]
+    then
+        echo "Reusing existing multiallelic predictions: ${variant}"
+    else
+        echo time mhcflurry-predict \
+            "$(pwd)/benchmark.multiallelic.csv.bz2" \
             --allele-column hla \
             --prediction-column-prefix "${variant}_" \
-            --models "$(mhcflurry-downloads path models_class1_pan_variants)/models.$variant" \
+            --models \""$(mhcflurry-downloads path models_class1_pan_variants)/models.$variant"\" \
             --affinity-only \
             --no-affinity-percentile \
-            --out "$(pwd)/benchmark.multiallelic.predictions1.csv"
-    done
-
-    bzip2 -f benchmark.multiallelic.predictions1.csv
-    rm -f benchmark.multiallelic.predictions2.csv.bz2
-fi
+            --out "$(pwd)/benchmark.multiallelic.${variant}.csv" >> commands/multiallelic.${variant}.sh
+        echo bzip2 -f "$(pwd)/benchmark.multiallelic.${variant}.csv" >> commands/multiallelic.${variant}.sh
+    fi
+done
 
 
 ### PRESENTATION: WITH FLANKS
-if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions2.csv.bz2" ]
+if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.presentation_with_flanks.csv.bz2" ]
 then
-    echo "Reusing existing multiallelic predictions2"
+    echo "Reusing existing multiallelic presentation with flanks"
 else
-    time mhcflurry-predict \
-        "$(pwd)/benchmark.multiallelic.predictions1.csv.bz2" \
+    echo time mhcflurry-predict \
+        "$(pwd)/benchmark.multiallelic.csv.bz2" \
         --allele-column hla \
         --prediction-column-prefix presentation_with_flanks_ \
-        --models "$(mhcflurry-downloads path models_class1_presentation)/models" \
+        --models \""$(mhcflurry-downloads path models_class1_presentation)/models"\" \
         --no-affinity-percentile \
-        --out "$(pwd)/benchmark.multiallelic.predictions2.csv"
-
-    bzip2 -f benchmark.multiallelic.predictions2.csv
-    rm -f benchmark.multiallelic.predictions3.csv.bz2
+        --out "$(pwd)/benchmark.multiallelic.presentation_with_flanks.csv" >> commands/multiallelic.presentation_with_flanks.sh
+    echo bzip2 -f "$(pwd)/benchmark.multiallelic.presentation_with_flanks.csv"  >> commands/multiallelic.presentation_with_flanks.sh
 fi
 
 ### PRESENTATION: NO FLANKS
-if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions3.csv.bz2" ]
+if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.presentation_without_flanks.csv.bz2" ]
 then
-    echo "Reusing existing multiallelic predictions3"
+    echo "Reusing existing multiallelic presentation without flanks"
 else
-    time mhcflurry-predict \
-        "$(pwd)/benchmark.multiallelic.predictions2.csv.bz2" \
+    echo time mhcflurry-predict \
+        "$(pwd)/benchmark.multiallelic.csv.bz2" \
         --allele-column hla \
-        --prediction-column-prefix presentation_with_flanks_ \
-        --models "$(mhcflurry-downloads path models_class1_presentation)/models" \
+        --prediction-column-prefix presentation_without_flanks_ \
+        --models \""$(mhcflurry-downloads path models_class1_presentation)/models"\" \
         --no-affinity-percentile \
         --no-flanking \
-        --out "$(pwd)/benchmark.multiallelic.predictions3.csv"
-
-    bzip2 -f benchmark.multiallelic.predictions3.csv
+        --out "$(pwd)/benchmark.multiallelic.presentation_without_flanks.csv" >> commands/multiallelic.presentation_without_flanks.sh
+    echo bzip2 -f "$(pwd)/benchmark.multiallelic.presentation_without_flanks.csv"  >> commands/multiallelic.presentation_without_flanks.sh
 fi
 
+ls -lh commands
+
+if [ "$1" != "cluster" ]
+then
+    echo "Running locally"
+    for i in $(ls commands/*.sh)
+    do
+        echo "# *******"
+        echo "# Command $i"
+        cat $i
+        bash $i
+    done
+else
+    echo "Running on cluster"
+    for i in $(ls commands/*.sh)
+    do
+        echo "# *******"
+        echo "# Command $i"
+        cat $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf > ${i}.lsf
+        echo cd "$(pwd)" >> ${i}.lsf
+        cat $i >> ${i}.lsf
+        cat ${i}.lsf
+        bsub -K < {i}.lsf &
+        bash $i
+    done
+    wait
+fi
 
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 -f "$LOG"
diff --git a/downloads-generation/data_evaluation/cluster_submit_script_header.mssm_hpc.lsf b/downloads-generation/data_evaluation/cluster_submit_script_header.mssm_hpc.lsf
new file mode 100644
index 00000000..a0783f2e
--- /dev/null
+++ b/downloads-generation/data_evaluation/cluster_submit_script_header.mssm_hpc.lsf
@@ -0,0 +1,42 @@
+#!/bin/bash
+#BSUB -J MHCf # Job name
+#BSUB -P acc_nkcancer # allocation account or Unix group
+#BSUB -q gpu # queue
+#BSUB -R rusage[ngpus_excl_p=1]  # 1 exclusive GPU
+#BSUB -R span[hosts=1] # one node
+#BSUB -n 1 # number of compute cores
+#BSUB -W 10:00 # walltime in HH:MM
+#BSUB -R rusage[mem=20000] # mb memory requested
+#BSUB -o %J.stdout # output log (%J : JobID)
+#BSUB -eo %J.stderr # error log
+#BSUB -L /bin/bash # Initialize the execution environment
+#
+
+set -e
+set -x
+
+echo "Subsequent stderr output redirected to stdout" >&2
+exec 2>&1
+
+export TMPDIR=/local/JOBS/mhcflurry
+export PATH=$HOME/.conda/envs/py36b/bin/:$PATH
+export PYTHONUNBUFFERED=1
+export KMP_SETTINGS=1
+
+free -m
+
+module add cuda/10.0.130
+module list
+
+export CUDNN_HOME=/hpc/users/odonnt02/oss/cudnn/cuda
+export LD_LIBRARY_PATH=$CUDNN_HOME/lib64:$LD_LIBRARY_PATH
+export CMAKE_LIBRARY_PATH=$CUDNN_HOME/lib64:$CMAKE_LIBRARY_PATH
+export INCLUDE_PATH=$CUDNN_HOME/include:$INCLUDE_PATH
+export C_INCLUDE_PATH=$CUDNN_HOME/include:$C_INCLUDE_PATH
+export CPLUS_INCLUDE_PATH=$CUDNN_HOME/include:$CPLUS_INCLUDE_PATH
+export CMAKE_INCLUDE_PATH=$CUDNN_HOME/include:$CMAKE_INCLUDE_PATH
+
+python -c 'import tensorflow as tf ; print("GPU AVAILABLE" if tf.test.is_gpu_available() else "GPU NOT AVAILABLE")'
+
+env
+
-- 
GitLab