From f2858cff2b243a46ff33631a5dfbcacac52330d0 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Wed, 12 Jun 2019 08:58:51 -0400
Subject: [PATCH] fix generrate.sh

---
 .../models_class1_pan/GENERATE.sh                | 14 ++++++--------
 .../generate_hyperparameters.py                  | 16 +++++++++-------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/downloads-generation/models_class1_pan/GENERATE.sh b/downloads-generation/models_class1_pan/GENERATE.sh
index 8724ed3f..ca880004 100755
--- a/downloads-generation/models_class1_pan/GENERATE.sh
+++ b/downloads-generation/models_class1_pan/GENERATE.sh
@@ -15,8 +15,8 @@ rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
 mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
 
 # Send stdout and stderr to a logfile included with the archive.
-#exec >  >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
-#exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
+exec >  >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
+exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
 
 # Log some environment info
 date
@@ -35,7 +35,7 @@ echo "Detected GPUS: $GPUS"
 PROCESSORS=$(getconf _NPROCESSORS_ONLN)
 echo "Detected processors: $PROCESSORS"
 
-time mhcflurry-class1-train-pan-allele-models \
+mhcflurry-class1-train-pan-allele-models \
     --data "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \
     --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
     --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
@@ -43,14 +43,12 @@ time mhcflurry-class1-train-pan-allele-models \
     --ensemble-size 4 \
     --hyperparameters hyperparameters.yaml \
     --out-models-dir models-unselected \
-
-
-    #--num-jobs $(expr $PROCESSORS \* 2) --gpus $GPUS --max-workers-per-gpu 2 --max-tasks-per-worker 50
+    --num-jobs 4 --gpus $GPUS --max-workers-per-gpu 1 --max-tasks-per-worker 1
 
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 LOG.txt
 tar -cjf "../${DOWNLOAD_NAME}.with_unselected.tar.bz2" *
 echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.with_unselected.tar.bz2"
 
-ls * | grep -v models-unselected | xargs -I {} tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" {}
-echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.tar.bz2"
\ No newline at end of file
+ls -d * | grep -v models-unselected | xargs -I {} tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" {}
+echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.tar.bz2"
diff --git a/downloads-generation/models_class1_pan/generate_hyperparameters.py b/downloads-generation/models_class1_pan/generate_hyperparameters.py
index ea728dc3..72be989f 100644
--- a/downloads-generation/models_class1_pan/generate_hyperparameters.py
+++ b/downloads-generation/models_class1_pan/generate_hyperparameters.py
@@ -48,12 +48,14 @@ base_hyperparameters = {
 }
 
 grid = []
-for layer_sizes in [[512, 256], [1024, 512], [1024, 1024]]:
-    for l1 in [0.0, 0.0001, 0.001, 0.01]:
-        new = deepcopy(base_hyperparameters)
-        new["layer_sizes"] = layer_sizes
-        new["dense_layer_l1_regularization"] = l1
-        if not grid or new not in grid:
-            grid.append(new)
+for layer_sizes in [[512, 256], [512, 512],]:
+    for pretrain in [True, False]:
+        for l1 in [0.0, 0.0001, 0.001, 0.01]:
+            new = deepcopy(base_hyperparameters)
+            new["layer_sizes"] = layer_sizes
+            new["dense_layer_l1_regularization"] = l1
+            new["train_data"]["pretrain"] = pretrain
+            if not grid or new not in grid:
+                grid.append(new)
 
 dump(grid, stdout)
-- 
GitLab