Skip to content
Snippets Groups Projects
Commit f472d4b7 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

update comments

parent c0479deb
No related merge requests found
......@@ -400,7 +400,8 @@ class Class1AffinityPredictor(object):
inequalities=None,
models_dir_for_save=None,
verbose=0,
progress_preamble=""):
progress_preamble="",
progress_print_interval=5.0):
"""
Fit one or more allele specific predictors for a single allele using a
single neural network architecture.
......@@ -438,6 +439,9 @@ class Class1AffinityPredictor(object):
progress_preamble : string
Optional string of information to include in each progress update
progress_print_interval : float
How often (in seconds) to print progress. Set to None to disable.
Returns
-------
list of `Class1NeuralNetwork`
......@@ -488,7 +492,8 @@ class Class1AffinityPredictor(object):
model_num=model_num + 1,
n_models=n_models,
architecture_num=architecture_num + 1,
n_architectures=n_architectures))
n_architectures=n_architectures),
progress_print_interval=progress_print_interval)
if n_architectures > 1:
# We require val_loss (i.e. a validation set) if we have
......@@ -532,7 +537,8 @@ class Class1AffinityPredictor(object):
inequalities,
models_dir_for_save=None,
verbose=1,
progress_preamble=""):
progress_preamble="",
progress_print_interval=5.0):
"""
Fit one or more pan-allele predictors using a single neural network
architecture.
......@@ -568,6 +574,9 @@ class Class1AffinityPredictor(object):
progress_preamble : string
Optional string of information to include in each progress update
progress_print_interval : float
How often (in seconds) to print progress. Set to None to disable.
Returns
-------
list of `Class1NeuralNetwork`
......@@ -589,7 +598,8 @@ class Class1AffinityPredictor(object):
inequalities=inequalities,
allele_encoding=allele_encoding,
verbose=verbose,
progress_preamble=progress_preamble)
progress_preamble=progress_preamble,
progress_print_interval=progress_print_interval)
model_name = self.model_name("pan-class1", i)
self.class1_pan_allele_models.append(model)
......
......@@ -418,7 +418,8 @@ class Class1NeuralNetwork(object):
sample_weights=None,
shuffle_permutation=None,
verbose=1,
progress_preamble=""):
progress_preamble="",
progress_print_interval=5.0):
"""
Fit the neural network.
......@@ -454,6 +455,10 @@ class Class1NeuralNetwork(object):
progress_preamble : string
Optional string of information to include in each progress update
progress_print_interval : float
How often (in seconds) to print progress update. Set to None to
disable.
"""
self.fit_num_points = len(peptides)
......@@ -673,7 +678,10 @@ class Class1NeuralNetwork(object):
self.loss_history[key].extend(value)
# Print progress no more often than once every few seconds.
if not last_progress_print or time.time() - last_progress_print > 5:
if progress_print_interval is not None and (
not last_progress_print or (
time.time() - last_progress_print
> progress_print_interval)):
print((progress_preamble + " " +
"Epoch %3d / %3d: loss=%g. "
"Min val loss (%s) at epoch %s" % (
......@@ -697,14 +705,15 @@ class Class1NeuralNetwork(object):
min_val_loss_iteration +
self.hyperparameters['patience'])
if i > threshold:
print((progress_preamble + " " +
"Stopping at epoch %3d / %3d: loss=%g. "
"Min val loss (%s) at epoch %s" % (
i,
self.hyperparameters['max_epochs'],
self.loss_history['loss'][-1],
str(min_val_loss),
min_val_loss_iteration)).strip())
if progress_print_interval is not None:
print((progress_preamble + " " +
"Stopping at epoch %3d / %3d: loss=%g. "
"Min val loss (%s) at epoch %s" % (
i,
self.hyperparameters['max_epochs'],
self.loss_history['loss'][-1],
str(min_val_loss),
min_val_loss_iteration)).strip())
break
self.fit_seconds = time.time() - start
......
......@@ -26,7 +26,7 @@ from .common import configure_logging, set_keras_backend
# parallel, we use this global variable as a place to store data. Data that is
# stored here before creating the thread pool will be inherited to the child
# processes upon fork() call, allowing us to share large data with the workers
# efficiently.
# via shared memory.
GLOBAL_DATA = {}
......@@ -119,7 +119,11 @@ parser.add_argument(
help="Keras backend. If not specified will use system default.")
parser.add_argument(
"--gpus",
type=int)
type=int,
metavar="N",
help="Number of GPUs to attempt to parallelize across. Requires running "
"in parallel.")
def run(argv=sys.argv[1:]):
global GLOBAL_DATA
......@@ -174,28 +178,30 @@ def run(argv=sys.argv[1:]):
predictor = Class1AffinityPredictor()
if args.num_jobs[0] == 1:
# Serial run
# Serial run.
print("Running in serial.")
worker_pool = None
if args.backend:
set_keras_backend(args.backend)
else:
# Parallel run.
env_queue = None
if args.gpus:
print("Attempting to round-robin assign each worker a GPU.")
# We assign each worker to a GPU using the CUDA_VISIBLE_DEVICES
# environment variable. To do this, we push environment variables
# onto a queue. Each worker reads a single item from the queue,
# which is a list of environment variables to set.
next_device = itertools.cycle([
"%d" % num
for num in range(args.gpus)
"%d" % num for num in range(args.gpus)
])
queue_items = []
env_queue = Queue()
for num in range(args.num_jobs[0]):
queue_items.append([
item = [
("CUDA_VISIBLE_DEVICES", next(next_device)),
])
print("Attempting to round-robin assign each worker a GPU", queue_items)
env_queue = Queue()
for item in queue_items:
]
env_queue.put(item)
worker_pool = Pool(
......@@ -238,6 +244,7 @@ def run(argv=sys.argv[1:]):
'data': None, # subselect from GLOBAL_DATA["train_data"]
'hyperparameters': hyperparameters,
'verbose': args.verbosity,
'progress_print_interval': None if worker_pool else 5.0,
'predictor': predictor if not worker_pool else None,
'save_to': args.out_models_dir if not worker_pool else None,
}
......@@ -361,6 +368,7 @@ def train_model(
data,
hyperparameters,
verbose,
progress_print_interval,
predictor,
save_to):
......@@ -395,6 +403,7 @@ def train_model(
if "measurement_inequality" in train_data.columns else None),
models_dir_for_save=save_to,
progress_preamble=progress_preamble,
progress_print_interval=progress_print_interval,
verbose=verbose)
if allele_num == 0 and model_group == 0:
......@@ -427,6 +436,8 @@ def calibrate_percentile_ranks(allele, predictor, peptides=None):
def worker_init(env_queue=None):
global GLOBAL_DATA
# The env_queue provides a way for each worker to be configured with a
# specific set of environment variables. We use it to assign GPUs to workers.
if env_queue:
settings = env_queue.get()
print("Setting: ", settings)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment