From bf54628ab0214f96e0368302561674d4e0fd1be6 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sat, 13 Jul 2019 14:53:29 -0400
Subject: [PATCH] fix

---
 .../cluster_submit_script_header.mssm_hpc.lsf |  2 +-
 mhcflurry/train_pan_allele_models_command.py  | 25 +++++++++++--------
 test/test_train_pan_allele_models_command.py  |  1 +
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf b/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf
index 2844b313..16a8411d 100644
--- a/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf
+++ b/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf
@@ -5,7 +5,7 @@
 #BSUB -R span[hosts=1] # one node
 #BSUB -n 1 # number of compute cores
 #BSUB -W 36:00 # walltime in HH:MM
-#BSUB -R rusage[mem=60000] # mb memory requested
+#BSUB -R rusage[mem=30000] # mb memory requested
 #BSUB -o {work_dir}/%J.stdout # output log (%J : JobID)
 #BSUB -eo {work_dir}/%J.stderr # error log
 #BSUB -L /bin/bash # Initialize the execution environment
diff --git a/mhcflurry/train_pan_allele_models_command.py b/mhcflurry/train_pan_allele_models_command.py
index 1ab50a0f..95264ca3 100644
--- a/mhcflurry/train_pan_allele_models_command.py
+++ b/mhcflurry/train_pan_allele_models_command.py
@@ -460,8 +460,6 @@ def train_model(
     train_alleles = AlleleEncoding(
         train_data.allele.values, borrow_from=allele_encoding)
 
-    model = Class1NeuralNetwork(**hyperparameters)
-
     progress_preamble = (
         "[task %2d / %2d]: "
         "[%2d / %2d folds] "
@@ -479,9 +477,7 @@ def train_model(
     print("%s [pid %d]. Hyperparameters:" % (progress_preamble, os.getpid()))
     pprint.pprint(hyperparameters)
 
-    assert model.network() is None
     if hyperparameters.get("train_data", {}).get("pretrain", False):
-        generator = pretrain_data_iterator(pretrain_data_filename, allele_encoding)
         pretrain_patience = hyperparameters["train_data"].get(
             "pretrain_patience", 10)
         pretrain_min_delta = hyperparameters["train_data"].get(
@@ -491,7 +487,7 @@ def train_model(
         pretrain_max_epochs = hyperparameters["train_data"].get(
             "pretrain_max_epochs", 1000)
 
-        max_val_loss =  hyperparameters["train_data"].get("pretrain_max_val_loss")
+        max_val_loss = hyperparameters["train_data"].get("pretrain_max_val_loss")
 
         attempt = 0
         while True:
@@ -500,6 +496,11 @@ def train_model(
             if attempt > 10:
                 print("Too many pre-training attempts! Stopping pretraining.")
                 break
+
+            model = Class1NeuralNetwork(**hyperparameters)
+            assert model.network() is None
+            generator = pretrain_data_iterator(
+                pretrain_data_filename, allele_encoding)
             model.fit_generator(
                 generator,
                 validation_peptide_encoding=train_peptides,
@@ -512,14 +513,17 @@ def train_model(
                 epochs=pretrain_max_epochs,
                 verbose=verbose,
             )
+            model.fit_info[-1].setdefault(
+                "training_info", {})["pretrain_attempt"] = attempt
             if not max_val_loss:
                 break
-            if model.fit_info[-1]["val_loss"] >= max_val_loss:
+            final_val_loss = model.fit_info[-1]["val_loss"][-1]
+            if final_val_loss >= max_val_loss:
                 print("Val loss %f >= max val loss %f. Pre-training again." % (
-                    model.fit_info[-1]["val_loss"], max_val_loss))
+                    final_val_loss, max_val_loss))
             else:
                 print("Val loss %f < max val loss %f. Done pre-training." % (
-                    model.fit_info[-1]["val_loss"], max_val_loss))
+                    final_val_loss, max_val_loss))
                 break
 
         # Use a smaller learning rate for training on real data
@@ -541,7 +545,8 @@ def train_model(
     train_peptide_hash = hashlib.sha1()
     for peptide in sorted(train_data.peptide.values):
         train_peptide_hash.update(peptide.encode())
-    model.fit_info[-1]["training_info"] = {
+
+    model.fit_info[-1].setdefault("training_info", {}).update({
         "fold_num": fold_num,
         "num_folds": num_folds,
         "replicate_num": replicate_num,
@@ -549,7 +554,7 @@ def train_model(
         "architecture_num": architecture_num,
         "num_architectures": num_architectures,
         "train_peptide_hash": train_peptide_hash.hexdigest(),
-    }
+    })
 
     numpy.testing.assert_equal(
         predictor.manifest_df.shape[0], len(predictor.class1_pan_allele_models))
diff --git a/test/test_train_pan_allele_models_command.py b/test/test_train_pan_allele_models_command.py
index f4255f76..52c747b0 100644
--- a/test/test_train_pan_allele_models_command.py
+++ b/test/test_train_pan_allele_models_command.py
@@ -95,6 +95,7 @@ HYPERPARAMETERS_LIST = [
         "pretrain": True,
         'pretrain_peptides_per_epoch': 128,
         'pretrain_max_epochs': 2,
+        'pretrain_max_val_loss': 0.1,
     },
     'validation_split': 0.1,
 },
-- 
GitLab