From 2d6fbe464a807c80e18481685ed16813ac4fde7c Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sun, 18 Aug 2019 10:13:32 -0400
Subject: [PATCH] fix

---
 .../GENERATE.WITH_HPC_CLUSTER.sh              | 28 +++++++++++++++++--
 mhcflurry/train_pan_allele_models_command.py  | 10 ++++++-
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
index 9faf5e56..5f0dc984 100755
--- a/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
+++ b/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
@@ -36,20 +36,42 @@ python generate_hyperparameters.py > hyperparameters.yaml
 
 for kind in with_mass_spec no_mass_spec
 do
-    mhcflurry-class1-train-pan-allele-models \
+    echo mhcflurry-class1-train-pan-allele-models \
         --data "$(mhcflurry-downloads path data_curated)/curated_training_data.${kind}.csv.bz2" \
         --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
         --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
         --held-out-measurements-per-allele-fraction-and-max 0.25 100 \
         --ensemble-size 4 \
         --hyperparameters hyperparameters.yaml \
-        --out-models-dir models.${kind} \
+        --out-models-dir $(pwd)/models.${kind} \
         --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
         --verbosity 0 \
         --cluster-parallelism \
         --cluster-submit-command bsub \
         --cluster-results-workdir ~/mhcflurry-scratch \
-        --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
+        --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf \
+        \\ > INITIALIZE.${kind}.sh
+
+    cp INITIALIZE.${kind}.sh PROCESS.${kind}.sh
+    echo "--only-initialize" >> INITIALIZE.${kind}.sh
+    echo "--continue-incomplete" >> PROCESS.${kind}.sh
+
+    bash INITIALIZE.${kind}.sh
+    echo "Done initializing."
+
+    bash PROCESS.${kind}.sh && touch $(pwd)/models.${kind}/COMPLETE || true
+    echo "Processing terminated."
+
+    # In case the above command fails, the job can may still be fixable manually.
+    # So we wait for the COMPLETE file here.
+    if [ ! -f models.${kind}/COMPLETE ]
+    then
+        echo "Waiting for models.${kind}/COMPLETE"
+    fi
+    while [ ! -f models.${kind}/COMPLETE ]
+    do
+        sleep 60
+    done
 done
 
 cp $SCRIPT_ABSOLUTE_PATH .
diff --git a/mhcflurry/train_pan_allele_models_command.py b/mhcflurry/train_pan_allele_models_command.py
index 207a58f6..e61f5bae 100644
--- a/mhcflurry/train_pan_allele_models_command.py
+++ b/mhcflurry/train_pan_allele_models_command.py
@@ -121,6 +121,12 @@ parser.add_argument(
     default=False,
     help="Continue training models from an incomplete training run. If this is "
     "specified then the only required argument is --out-models-dir")
+parser.add_argument(
+    "--only-initialize",
+    action="store_true",
+    default=False,
+    help="Do not actually train models. The initialized run can be continued "
+    "later with --continue-incomplete.")
 
 add_local_parallelism_args(parser)
 add_cluster_parallelism_args(parser)
@@ -235,7 +241,9 @@ def main(args):
 
     if not args.continue_incomplete:
         initialize_training(args)
-    train_models(args)
+
+    if not args.only_initialize:
+        train_models(args)
 
 
 def initialize_training(args):
-- 
GitLab