update comments

f472d4b7 · Tim O'Donnell · c0479deb · f472d4b7 · f472d4b7 · f472d4b7
Commit f472d4b7 authored 7 years ago by Tim O'Donnell
--- a/mhcflurry/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_predictor.py
@@ -400,7 +400,8 @@ class Class1AffinityPredictor(object):
            inequalities=None,
            models_dir_for_save=None,
            verbose=0,
-            progress_preamble=""):
+            progress_preamble="",
+            progress_print_interval=5.0):
        """
        Fit one or more allele specific predictors for a single allele using a
        single neural network architecture.
@@ -438,6 +439,9 @@ class Class1AffinityPredictor(object):
        progress_preamble : string
            Optional string of information to include in each progress update

+        progress_print_interval : float
+            How often (in seconds) to print progress. Set to None to disable.
+
        Returns
        -------
        list of `Class1NeuralNetwork`
@@ -488,7 +492,8 @@ class Class1AffinityPredictor(object):
                        model_num=model_num + 1,
                        n_models=n_models,
                        architecture_num=architecture_num + 1,
-                        n_architectures=n_architectures))
+                        n_architectures=n_architectures),
+                    progress_print_interval=progress_print_interval)

                if n_architectures > 1:
                    # We require val_loss (i.e. a validation set) if we have
@@ -532,7 +537,8 @@ class Class1AffinityPredictor(object):
            inequalities,
            models_dir_for_save=None,
            verbose=1,
-            progress_preamble=""):
+            progress_preamble="",
+            progress_print_interval=5.0):
        """
        Fit one or more pan-allele predictors using a single neural network
        architecture.
@@ -568,6 +574,9 @@ class Class1AffinityPredictor(object):
        progress_preamble : string
            Optional string of information to include in each progress update

+        progress_print_interval : float
+            How often (in seconds) to print progress. Set to None to disable.
+
        Returns
        -------
        list of `Class1NeuralNetwork`
@@ -589,7 +598,8 @@ class Class1AffinityPredictor(object):
                inequalities=inequalities,
                allele_encoding=allele_encoding,
                verbose=verbose,
-                progress_preamble=progress_preamble)
+                progress_preamble=progress_preamble,
+                progress_print_interval=progress_print_interval)

            model_name = self.model_name("pan-class1", i)
            self.class1_pan_allele_models.append(model)

--- a/mhcflurry/class1_neural_network.py
+++ b/mhcflurry/class1_neural_network.py
@@ -418,7 +418,8 @@ class Class1NeuralNetwork(object):
            sample_weights=None,
            shuffle_permutation=None,
            verbose=1,
-            progress_preamble=""):
+            progress_preamble="",
+            progress_print_interval=5.0):
        """
        Fit the neural network.
        
@@ -454,6 +455,10 @@ class Class1NeuralNetwork(object):

        progress_preamble : string
            Optional string of information to include in each progress update
+
+        progress_print_interval : float
+            How often (in seconds) to print progress update. Set to None to
+            disable.
        """

        self.fit_num_points = len(peptides)
@@ -673,7 +678,10 @@ class Class1NeuralNetwork(object):
                self.loss_history[key].extend(value)

            # Print progress no more often than once every few seconds.
-            if not last_progress_print or time.time() - last_progress_print > 5:
+            if progress_print_interval is not None and (
+                    not last_progress_print or (
+                        time.time() - last_progress_print
+                        > progress_print_interval)):
                print((progress_preamble + " " +
                       "Epoch %3d / %3d: loss=%g. "
                       "Min val loss (%s) at epoch %s" % (
@@ -697,14 +705,15 @@ class Class1NeuralNetwork(object):
                        min_val_loss_iteration +
                        self.hyperparameters['patience'])
                    if i > threshold:
-                        print((progress_preamble + " " +
-                            "Stopping at epoch %3d / %3d: loss=%g. "
-                            "Min val loss (%s) at epoch %s" % (
-                                i,
-                                self.hyperparameters['max_epochs'],
-                                self.loss_history['loss'][-1],
-                                str(min_val_loss),
-                                min_val_loss_iteration)).strip())
+                        if progress_print_interval is not None:
+                            print((progress_preamble + " " +
+                                "Stopping at epoch %3d / %3d: loss=%g. "
+                                "Min val loss (%s) at epoch %s" % (
+                                    i,
+                                    self.hyperparameters['max_epochs'],
+                                    self.loss_history['loss'][-1],
+                                    str(min_val_loss),
+                                    min_val_loss_iteration)).strip())
                        break
        self.fit_seconds = time.time() - start


--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
@@ -26,7 +26,7 @@ from .common import configure_logging, set_keras_backend
 # parallel, we use this global variable as a place to store data. Data that is
 # stored here before creating the thread pool will be inherited to the child
 # processes upon fork() call, allowing us to share large data with the workers
-# efficiently.
+# via shared memory.
 GLOBAL_DATA = {}


@@ -119,7 +119,11 @@ parser.add_argument(
    help="Keras backend. If not specified will use system default.")
 parser.add_argument(
    "--gpus",
-    type=int)
+    type=int,
+    metavar="N",
+    help="Number of GPUs to attempt to parallelize across. Requires running "
+    "in parallel.")
+

 def run(argv=sys.argv[1:]):
    global GLOBAL_DATA
@@ -174,28 +178,30 @@ def run(argv=sys.argv[1:]):

    predictor = Class1AffinityPredictor()
    if args.num_jobs[0] == 1:
-        # Serial run
+        # Serial run.
        print("Running in serial.")
        worker_pool = None
        if args.backend:
            set_keras_backend(args.backend)

    else:
+        # Parallel run.
        env_queue = None
        if args.gpus:
+            print("Attempting to round-robin assign each worker a GPU.")
+
+            # We assign each worker to a GPU using the CUDA_VISIBLE_DEVICES
+            # environment variable. To do this, we push environment variables
+            # onto a queue. Each worker reads a single item from the queue,
+            # which is a list of environment variables to set.
            next_device = itertools.cycle([
-                "%d" % num
-                for num in range(args.gpus)
+                "%d" % num for num in range(args.gpus)
            ])
-            queue_items = []
+            env_queue = Queue()
            for num in range(args.num_jobs[0]):
-                queue_items.append([
+                item = [
                    ("CUDA_VISIBLE_DEVICES", next(next_device)),
-                ])
-        
-            print("Attempting to round-robin assign each worker a GPU", queue_items)
-            env_queue = Queue()
-            for item in queue_items:
+                ]
                env_queue.put(item)

        worker_pool = Pool(
@@ -238,6 +244,7 @@ def run(argv=sys.argv[1:]):
                    'data': None,  # subselect from GLOBAL_DATA["train_data"]
                    'hyperparameters': hyperparameters,
                    'verbose': args.verbosity,
+                    'progress_print_interval': None if worker_pool else 5.0,
                    'predictor': predictor if not worker_pool else None,
                    'save_to': args.out_models_dir if not worker_pool else None,
                }
@@ -361,6 +368,7 @@ def train_model(
        data,
        hyperparameters,
        verbose,
+        progress_print_interval,
        predictor,
        save_to):

@@ -395,6 +403,7 @@ def train_model(
            if "measurement_inequality" in train_data.columns else None),
        models_dir_for_save=save_to,
        progress_preamble=progress_preamble,
+        progress_print_interval=progress_print_interval,
        verbose=verbose)

    if allele_num == 0 and model_group == 0:
@@ -427,6 +436,8 @@ def calibrate_percentile_ranks(allele, predictor, peptides=None):
 def worker_init(env_queue=None):
    global GLOBAL_DATA

+    # The env_queue provides a way for each worker to be configured with a
+    # specific set of environment variables. We use it to assign GPUs to workers.
    if env_queue:
        settings = env_queue.get()
        print("Setting: ", settings)