From bf54628ab0214f96e0368302561674d4e0fd1be6 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Sat, 13 Jul 2019 14:53:29 -0400 Subject: [PATCH] fix --- .../cluster_submit_script_header.mssm_hpc.lsf | 2 +- mhcflurry/train_pan_allele_models_command.py | 25 +++++++++++-------- test/test_train_pan_allele_models_command.py | 1 + 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf b/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf index 2844b313..16a8411d 100644 --- a/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf +++ b/downloads-generation/models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf @@ -5,7 +5,7 @@ #BSUB -R span[hosts=1] # one node #BSUB -n 1 # number of compute cores #BSUB -W 36:00 # walltime in HH:MM -#BSUB -R rusage[mem=60000] # mb memory requested +#BSUB -R rusage[mem=30000] # mb memory requested #BSUB -o {work_dir}/%J.stdout # output log (%J : JobID) #BSUB -eo {work_dir}/%J.stderr # error log #BSUB -L /bin/bash # Initialize the execution environment diff --git a/mhcflurry/train_pan_allele_models_command.py b/mhcflurry/train_pan_allele_models_command.py index 1ab50a0f..95264ca3 100644 --- a/mhcflurry/train_pan_allele_models_command.py +++ b/mhcflurry/train_pan_allele_models_command.py @@ -460,8 +460,6 @@ def train_model( train_alleles = AlleleEncoding( train_data.allele.values, borrow_from=allele_encoding) - model = Class1NeuralNetwork(**hyperparameters) - progress_preamble = ( "[task %2d / %2d]: " "[%2d / %2d folds] " @@ -479,9 +477,7 @@ def train_model( print("%s [pid %d]. Hyperparameters:" % (progress_preamble, os.getpid())) pprint.pprint(hyperparameters) - assert model.network() is None if hyperparameters.get("train_data", {}).get("pretrain", False): - generator = pretrain_data_iterator(pretrain_data_filename, allele_encoding) pretrain_patience = hyperparameters["train_data"].get( "pretrain_patience", 10) pretrain_min_delta = hyperparameters["train_data"].get( @@ -491,7 +487,7 @@ def train_model( pretrain_max_epochs = hyperparameters["train_data"].get( "pretrain_max_epochs", 1000) - max_val_loss = hyperparameters["train_data"].get("pretrain_max_val_loss") + max_val_loss = hyperparameters["train_data"].get("pretrain_max_val_loss") attempt = 0 while True: @@ -500,6 +496,11 @@ def train_model( if attempt > 10: print("Too many pre-training attempts! Stopping pretraining.") break + + model = Class1NeuralNetwork(**hyperparameters) + assert model.network() is None + generator = pretrain_data_iterator( + pretrain_data_filename, allele_encoding) model.fit_generator( generator, validation_peptide_encoding=train_peptides, @@ -512,14 +513,17 @@ def train_model( epochs=pretrain_max_epochs, verbose=verbose, ) + model.fit_info[-1].setdefault( + "training_info", {})["pretrain_attempt"] = attempt if not max_val_loss: break - if model.fit_info[-1]["val_loss"] >= max_val_loss: + final_val_loss = model.fit_info[-1]["val_loss"][-1] + if final_val_loss >= max_val_loss: print("Val loss %f >= max val loss %f. Pre-training again." % ( - model.fit_info[-1]["val_loss"], max_val_loss)) + final_val_loss, max_val_loss)) else: print("Val loss %f < max val loss %f. Done pre-training." % ( - model.fit_info[-1]["val_loss"], max_val_loss)) + final_val_loss, max_val_loss)) break # Use a smaller learning rate for training on real data @@ -541,7 +545,8 @@ def train_model( train_peptide_hash = hashlib.sha1() for peptide in sorted(train_data.peptide.values): train_peptide_hash.update(peptide.encode()) - model.fit_info[-1]["training_info"] = { + + model.fit_info[-1].setdefault("training_info", {}).update({ "fold_num": fold_num, "num_folds": num_folds, "replicate_num": replicate_num, @@ -549,7 +554,7 @@ def train_model( "architecture_num": architecture_num, "num_architectures": num_architectures, "train_peptide_hash": train_peptide_hash.hexdigest(), - } + }) numpy.testing.assert_equal( predictor.manifest_df.shape[0], len(predictor.class1_pan_allele_models)) diff --git a/test/test_train_pan_allele_models_command.py b/test/test_train_pan_allele_models_command.py index f4255f76..52c747b0 100644 --- a/test/test_train_pan_allele_models_command.py +++ b/test/test_train_pan_allele_models_command.py @@ -95,6 +95,7 @@ HYPERPARAMETERS_LIST = [ "pretrain": True, 'pretrain_peptides_per_epoch': 128, 'pretrain_max_epochs': 2, + 'pretrain_max_val_loss': 0.1, }, 'validation_split': 0.1, }, -- GitLab