From d2b44b3822fc960d959a65852a9e2eec567d1b44 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Thu, 8 Feb 2018 18:54:25 -0500
Subject: [PATCH] new models_class1_unselected download

---
 .../models_class1/generate_hyperparameters.py | 41 ++++++++++---------
 .../GENERATE.sh                               |  4 +-
 .../README.md                                 |  0
 .../generate_hyperparameters.py               |  0
 4 files changed, 23 insertions(+), 22 deletions(-)
 rename downloads-generation/{models_class1_no_mass_spec => models_class1_unselected}/GENERATE.sh (92%)
 rename downloads-generation/{models_class1_no_mass_spec => models_class1_unselected}/README.md (100%)
 rename downloads-generation/{models_class1_no_mass_spec => models_class1_unselected}/generate_hyperparameters.py (100%)

diff --git a/downloads-generation/models_class1/generate_hyperparameters.py b/downloads-generation/models_class1/generate_hyperparameters.py
index dcd48aa8..c130471f 100644
--- a/downloads-generation/models_class1/generate_hyperparameters.py
+++ b/downloads-generation/models_class1/generate_hyperparameters.py
@@ -10,7 +10,7 @@ base_hyperparameters = {
     ##########################################
     # ENSEMBLE SIZE
     ##########################################
-    "n_models": 1,
+    "n_models": 8,
 
     ##########################################
     # OPTIMIZATION
@@ -19,7 +19,7 @@ base_hyperparameters = {
     "patience": 20,
     "early_stopping": True,
     "validation_split": 0.1,
-    "minibatch_size": 128,
+    "minibatch_size": None,
     "loss": "custom:mse_with_inequalities",
 
     ##########################################
@@ -52,28 +52,29 @@ base_hyperparameters = {
     "activation": "tanh",
     "output_activation": "sigmoid",
     "layer_sizes": [16],
-    "dense_layer_l1_regularization": 0.001,
+    "dense_layer_l1_regularization": None,
     "batch_normalization": False,
     "dropout_probability": 0.0,
 }
 
 grid = []
-for dense_layer_size in [64, 32, 16]:
-    for l1 in [0.001, 0.01, 0.0]:
-        for num_lc in [0, 1, 2]:
-            for lc_kernel_size in [3, 5]:
-                new = deepcopy(base_hyperparameters)
-                new["layer_sizes"] = [dense_layer_size]
-                new["dense_layer_l1_regularization"] = l1
-                (lc_layer,) = new["locally_connected_layers"]
-                lc_layer['kernel_size'] = lc_kernel_size
-                if num_lc == 0:
-                    new["locally_connected_layers"] = []
-                elif num_lc == 1:
-                    new["locally_connected_layers"] = [lc_layer]
-                elif num_lc == 2:
-                    new["locally_connected_layers"] = [lc_layer, deepcopy(lc_layer)]
-                if not grid or new not in grid:
-                    grid.append(new)
+for minibatch_size in [32, 128, 512]:
+    for dense_layer_size in [8, 16, 32, 64, 128]:
+        for l1 in [0.0, 0.001, 0.01, 0.1]:
+            for num_lc in [0, 1, 2]:
+                for lc_kernel_size in [3, 5]:
+                    new = deepcopy(base_hyperparameters)
+                    new["layer_sizes"] = [dense_layer_size]
+                    new["dense_layer_l1_regularization"] = l1
+                    (lc_layer,) = new["locally_connected_layers"]
+                    lc_layer['kernel_size'] = lc_kernel_size
+                    if num_lc == 0:
+                        new["locally_connected_layers"] = []
+                    elif num_lc == 1:
+                        new["locally_connected_layers"] = [lc_layer]
+                    elif num_lc == 2:
+                        new["locally_connected_layers"] = [lc_layer, deepcopy(lc_layer)]
+                    if not grid or new not in grid:
+                        grid.append(new)
 
 dump(grid, stdout)
diff --git a/downloads-generation/models_class1_no_mass_spec/GENERATE.sh b/downloads-generation/models_class1_unselected/GENERATE.sh
similarity index 92%
rename from downloads-generation/models_class1_no_mass_spec/GENERATE.sh
rename to downloads-generation/models_class1_unselected/GENERATE.sh
index 20b74978..4890da0c 100755
--- a/downloads-generation/models_class1_no_mass_spec/GENERATE.sh
+++ b/downloads-generation/models_class1_unselected/GENERATE.sh
@@ -7,7 +7,7 @@
 set -e
 set -x
 
-DOWNLOAD_NAME=models_class1_no_mass_spec
+DOWNLOAD_NAME=models_class1_unselected
 SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
 SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
 SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
@@ -35,7 +35,7 @@ time mhcflurry-class1-train-allele-specific-models \
     --data "$(mhcflurry-downloads path data_curated)/curated_training_data.no_mass_spec.csv.bz2" \
     --hyperparameters hyperparameters.yaml \
     --out-models-dir models \
-    --percent-rank-calibration-num-peptides-per-length 100000 \
+    --percent-rank-calibration-num-peptides-per-length 0 \
     --min-measurements-per-allele 75 \
     --num-jobs 32 16
 
diff --git a/downloads-generation/models_class1_no_mass_spec/README.md b/downloads-generation/models_class1_unselected/README.md
similarity index 100%
rename from downloads-generation/models_class1_no_mass_spec/README.md
rename to downloads-generation/models_class1_unselected/README.md
diff --git a/downloads-generation/models_class1_no_mass_spec/generate_hyperparameters.py b/downloads-generation/models_class1_unselected/generate_hyperparameters.py
similarity index 100%
rename from downloads-generation/models_class1_no_mass_spec/generate_hyperparameters.py
rename to downloads-generation/models_class1_unselected/generate_hyperparameters.py
-- 
GitLab