From 59355ec6dd025e34148e3c15dc6f8f560a79677d Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sun, 18 Aug 2019 14:48:39 -0400
Subject: [PATCH] fixes

---
 .../GENERATE.WITH_HPC_CLUSTER.sh              | 48 +++++++++----------
 .../models_class1_pan_unselected/GENERATE.sh  | 26 ++++++++--
 .../generate_hyperparameters.py               |  2 +-
 mhcflurry/local_parallelism.py                | 11 ++---
 4 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
index ec154e9a..03023a91 100755
--- a/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
+++ b/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
@@ -14,14 +14,21 @@ SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "
 SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
 
 mkdir -p "$SCRATCH_DIR"
-rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
-mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
+if [ "$1" != "continue-incomplete" ]
+then
+    echo "Fresh run"
+    rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
+    mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
+else
+    echo "Continuing incomplete run"
+fi
 
 # Send stdout and stderr to a logfile included with the archive.
 exec >  >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
 exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
 
 # Log some environment info
+echo "Invocation: $0 $@"
 date
 pip freeze
 git status
@@ -31,12 +38,22 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME
 export OMP_NUM_THREADS=1
 export PYTHONUNBUFFERED=1
 
-cp $SCRIPT_DIR/generate_hyperparameters.py .
-python generate_hyperparameters.py > hyperparameters.yaml
+if [ "$1" != "continue-incomplete" ]
+then
+    cp $SCRIPT_DIR/generate_hyperparameters.py .
+    python generate_hyperparameters.py > hyperparameters.yaml
+fi
 
 for kind in with_mass_spec no_mass_spec
 do
-    echo mhcflurry-class1-train-pan-allele-models \
+    EXTRA_TRAIN_ARGS=""
+    if [ "$1" == "continue-incomplete" ] && [ -d "models.${kind}" ]
+    then
+        echo "Will continue existing run: $kind"
+        EXTRA_TRAIN_ARGS="--continue-incomplete"
+    fi
+
+    mhcflurry-class1-train-pan-allele-models \
         --data "$(mhcflurry-downloads path data_curated)/curated_training_data.${kind}.csv.bz2" \
         --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
         --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
@@ -50,26 +67,7 @@ do
         --cluster-submit-command bsub \
         --cluster-results-workdir ~/mhcflurry-scratch \
         --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf \
-        \\ > INITIALIZE.${kind}.sh
-
-    cp INITIALIZE.${kind}.sh PROCESS.${kind}.sh
-    echo "--only-initialize" >> INITIALIZE.${kind}.sh
-    echo "--continue-incomplete" >> PROCESS.${kind}.sh
-
-    bash INITIALIZE.${kind}.sh
-    echo "Done initializing."
-
-    bash PROCESS.${kind}.sh && touch $(pwd)/models.${kind}/COMPLETE || true
-    echo "Processing terminated."
-
-    # In case the above command fails, the job can may still be fixable manually.
-    # So we wait for the COMPLETE file here.
-    while [ ! -f models.${kind}/COMPLETE ]
-    do
-        echo "Waiting for $(pwd)/models.${kind}/COMPLETE"
-        echo "Processing script: $(pwd)/PROCESS.${kind}.sh"
-        sleep 60
-    done
+        $EXTRA_TRAIN_ARGS
 done
 
 cp $SCRIPT_ABSOLUTE_PATH .
diff --git a/downloads-generation/models_class1_pan_unselected/GENERATE.sh b/downloads-generation/models_class1_pan_unselected/GENERATE.sh
index 4be7648a..511dcae6 100755
--- a/downloads-generation/models_class1_pan_unselected/GENERATE.sh
+++ b/downloads-generation/models_class1_pan_unselected/GENERATE.sh
@@ -11,8 +11,14 @@ SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "
 SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
 
 mkdir -p "$SCRATCH_DIR"
-rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
-mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
+if [ "$1" != "continue-incomplete" ]
+then
+    echo "Fresh run"
+    rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
+    mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
+else
+    echo "Continuing incomplete run"
+fi
 
 # Send stdout and stderr to a logfile included with the archive.
 exec >  >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
@@ -43,8 +49,21 @@ echo "Num jobs: $NUM_JOBS"
 
 export PYTHONUNBUFFERED=1
 
+if [ "$1" != "continue-incomplete" ]
+then
+    cp $SCRIPT_DIR/generate_hyperparameters.py .
+    python generate_hyperparameters.py > hyperparameters.yaml
+fi
+
 for kind in with_mass_spec no_mass_spec
 do
+    EXTRA_TRAIN_ARGS=""
+    if [ "$1" == "continue-incomplete" ] && [ -d "models.${kind}" ]
+    then
+        echo "Will continue existing run: $kind"
+        EXTRA_TRAIN_ARGS="--continue-incomplete"
+    fi
+
     mhcflurry-class1-train-pan-allele-models \
         --data "$(mhcflurry-downloads path data_curated)/curated_training_data.${kind}.csv.bz2" \
         --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
@@ -55,7 +74,8 @@ do
         --out-models-dir models.${kind} \
         --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
         --verbosity 0 \
-        --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
+        --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 \
+        $EXTRA_TRAIN_ARGS
 done
 
 cp $SCRIPT_ABSOLUTE_PATH .
diff --git a/downloads-generation/models_class1_pan_unselected/generate_hyperparameters.py b/downloads-generation/models_class1_pan_unselected/generate_hyperparameters.py
index defa6481..8f8bf518 100644
--- a/downloads-generation/models_class1_pan_unselected/generate_hyperparameters.py
+++ b/downloads-generation/models_class1_pan_unselected/generate_hyperparameters.py
@@ -36,7 +36,7 @@ base_hyperparameters = {
     'peptide_dense_layer_sizes': [],
     'random_negative_affinity_max': 50000.0,
     'random_negative_affinity_min': 20000.0,
-    'random_negative_constant': 25,
+    'random_negative_constant': 1500,
     'random_negative_distribution_smoothing': 0.0,
     'random_negative_match_distribution': True,
     'random_negative_rate': 0.2,
diff --git a/mhcflurry/local_parallelism.py b/mhcflurry/local_parallelism.py
index 85806b71..b471f52a 100644
--- a/mhcflurry/local_parallelism.py
+++ b/mhcflurry/local_parallelism.py
@@ -1,7 +1,7 @@
 import traceback
 import sys
 import os
-import functools
+import time
 from multiprocessing import Pool, Queue, cpu_count
 from six.moves import queue
 from multiprocessing.util import Finalize
@@ -218,12 +218,9 @@ def worker_init_entry_point(
 
 def worker_init(keras_backend=None, gpu_device_nums=None, worker_log_dir=None):
     if worker_log_dir:
-        sys.stderr = sys.stdout = open(
-            os.path.join(worker_log_dir, "LOG-"
-                                         ""
-                                         ""
-                                         ""
-                                         "worker.%d.txt" % os.getpid()), "w")
+        sys.stderr = sys.stdout = open(os.path.join(
+            worker_log_dir,
+            "LOG-worker.%d.%d.txt" % (os.getpid(), int(time.time()))), "w")
 
     # Each worker needs distinct random numbers
     numpy.random.seed()
-- 
GitLab