From f472d4b71a24f43fea7add5ffe62b3ac0dddfa00 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Fri, 9 Feb 2018 17:31:32 -0500 Subject: [PATCH] update comments --- mhcflurry/class1_affinity_predictor.py | 18 +++++++--- mhcflurry/class1_neural_network.py | 29 +++++++++------ .../train_allele_specific_models_command.py | 35 ++++++++++++------- 3 files changed, 56 insertions(+), 26 deletions(-) diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index 5cf8bf5f..aa41f0cf 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -400,7 +400,8 @@ class Class1AffinityPredictor(object): inequalities=None, models_dir_for_save=None, verbose=0, - progress_preamble=""): + progress_preamble="", + progress_print_interval=5.0): """ Fit one or more allele specific predictors for a single allele using a single neural network architecture. @@ -438,6 +439,9 @@ class Class1AffinityPredictor(object): progress_preamble : string Optional string of information to include in each progress update + progress_print_interval : float + How often (in seconds) to print progress. Set to None to disable. + Returns ------- list of `Class1NeuralNetwork` @@ -488,7 +492,8 @@ class Class1AffinityPredictor(object): model_num=model_num + 1, n_models=n_models, architecture_num=architecture_num + 1, - n_architectures=n_architectures)) + n_architectures=n_architectures), + progress_print_interval=progress_print_interval) if n_architectures > 1: # We require val_loss (i.e. a validation set) if we have @@ -532,7 +537,8 @@ class Class1AffinityPredictor(object): inequalities, models_dir_for_save=None, verbose=1, - progress_preamble=""): + progress_preamble="", + progress_print_interval=5.0): """ Fit one or more pan-allele predictors using a single neural network architecture. @@ -568,6 +574,9 @@ class Class1AffinityPredictor(object): progress_preamble : string Optional string of information to include in each progress update + progress_print_interval : float + How often (in seconds) to print progress. Set to None to disable. + Returns ------- list of `Class1NeuralNetwork` @@ -589,7 +598,8 @@ class Class1AffinityPredictor(object): inequalities=inequalities, allele_encoding=allele_encoding, verbose=verbose, - progress_preamble=progress_preamble) + progress_preamble=progress_preamble, + progress_print_interval=progress_print_interval) model_name = self.model_name("pan-class1", i) self.class1_pan_allele_models.append(model) diff --git a/mhcflurry/class1_neural_network.py b/mhcflurry/class1_neural_network.py index 6ce50ffc..4b9e7878 100644 --- a/mhcflurry/class1_neural_network.py +++ b/mhcflurry/class1_neural_network.py @@ -418,7 +418,8 @@ class Class1NeuralNetwork(object): sample_weights=None, shuffle_permutation=None, verbose=1, - progress_preamble=""): + progress_preamble="", + progress_print_interval=5.0): """ Fit the neural network. @@ -454,6 +455,10 @@ class Class1NeuralNetwork(object): progress_preamble : string Optional string of information to include in each progress update + + progress_print_interval : float + How often (in seconds) to print progress update. Set to None to + disable. """ self.fit_num_points = len(peptides) @@ -673,7 +678,10 @@ class Class1NeuralNetwork(object): self.loss_history[key].extend(value) # Print progress no more often than once every few seconds. - if not last_progress_print or time.time() - last_progress_print > 5: + if progress_print_interval is not None and ( + not last_progress_print or ( + time.time() - last_progress_print + > progress_print_interval)): print((progress_preamble + " " + "Epoch %3d / %3d: loss=%g. " "Min val loss (%s) at epoch %s" % ( @@ -697,14 +705,15 @@ class Class1NeuralNetwork(object): min_val_loss_iteration + self.hyperparameters['patience']) if i > threshold: - print((progress_preamble + " " + - "Stopping at epoch %3d / %3d: loss=%g. " - "Min val loss (%s) at epoch %s" % ( - i, - self.hyperparameters['max_epochs'], - self.loss_history['loss'][-1], - str(min_val_loss), - min_val_loss_iteration)).strip()) + if progress_print_interval is not None: + print((progress_preamble + " " + + "Stopping at epoch %3d / %3d: loss=%g. " + "Min val loss (%s) at epoch %s" % ( + i, + self.hyperparameters['max_epochs'], + self.loss_history['loss'][-1], + str(min_val_loss), + min_val_loss_iteration)).strip()) break self.fit_seconds = time.time() - start diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py index 4b525127..9396230b 100644 --- a/mhcflurry/train_allele_specific_models_command.py +++ b/mhcflurry/train_allele_specific_models_command.py @@ -26,7 +26,7 @@ from .common import configure_logging, set_keras_backend # parallel, we use this global variable as a place to store data. Data that is # stored here before creating the thread pool will be inherited to the child # processes upon fork() call, allowing us to share large data with the workers -# efficiently. +# via shared memory. GLOBAL_DATA = {} @@ -119,7 +119,11 @@ parser.add_argument( help="Keras backend. If not specified will use system default.") parser.add_argument( "--gpus", - type=int) + type=int, + metavar="N", + help="Number of GPUs to attempt to parallelize across. Requires running " + "in parallel.") + def run(argv=sys.argv[1:]): global GLOBAL_DATA @@ -174,28 +178,30 @@ def run(argv=sys.argv[1:]): predictor = Class1AffinityPredictor() if args.num_jobs[0] == 1: - # Serial run + # Serial run. print("Running in serial.") worker_pool = None if args.backend: set_keras_backend(args.backend) else: + # Parallel run. env_queue = None if args.gpus: + print("Attempting to round-robin assign each worker a GPU.") + + # We assign each worker to a GPU using the CUDA_VISIBLE_DEVICES + # environment variable. To do this, we push environment variables + # onto a queue. Each worker reads a single item from the queue, + # which is a list of environment variables to set. next_device = itertools.cycle([ - "%d" % num - for num in range(args.gpus) + "%d" % num for num in range(args.gpus) ]) - queue_items = [] + env_queue = Queue() for num in range(args.num_jobs[0]): - queue_items.append([ + item = [ ("CUDA_VISIBLE_DEVICES", next(next_device)), - ]) - - print("Attempting to round-robin assign each worker a GPU", queue_items) - env_queue = Queue() - for item in queue_items: + ] env_queue.put(item) worker_pool = Pool( @@ -238,6 +244,7 @@ def run(argv=sys.argv[1:]): 'data': None, # subselect from GLOBAL_DATA["train_data"] 'hyperparameters': hyperparameters, 'verbose': args.verbosity, + 'progress_print_interval': None if worker_pool else 5.0, 'predictor': predictor if not worker_pool else None, 'save_to': args.out_models_dir if not worker_pool else None, } @@ -361,6 +368,7 @@ def train_model( data, hyperparameters, verbose, + progress_print_interval, predictor, save_to): @@ -395,6 +403,7 @@ def train_model( if "measurement_inequality" in train_data.columns else None), models_dir_for_save=save_to, progress_preamble=progress_preamble, + progress_print_interval=progress_print_interval, verbose=verbose) if allele_num == 0 and model_group == 0: @@ -427,6 +436,8 @@ def calibrate_percentile_ranks(allele, predictor, peptides=None): def worker_init(env_queue=None): global GLOBAL_DATA + # The env_queue provides a way for each worker to be configured with a + # specific set of environment variables. We use it to assign GPUs to workers. if env_queue: settings = env_queue.get() print("Setting: ", settings) -- GitLab