diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py
index 138dcdba75df1bc83252a7d4e212c1fc03612dbc..87fe6b019f2b3add80615fee68671aa50051904e 100644
--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
@@ -99,16 +99,16 @@ parser.add_argument(
     default=1,
     type=int,
     metavar="N",
-    help="Parallelization jobs. Experimental. "
-    "Set to 1 for serial run. Set to 0 to use number of cores. "
+    help="Number of processes to parallelize training over. "
+    "Set to 1 for serial run. Set to 0 to use number of cores. Experimental."
     "Default: %(default)s.")
 parser.add_argument(
     "--calibration-num-jobs",
     default=1,
     type=int,
     metavar="N",
-    help="Parallelization jobs. Experimental. "
-    "Set to 1 for serial run. Set to 0 to use number of cores. "
+    help="Number of processes to parallelize percent rank calibration over. "
+    "Set to 1 for serial run. Set to 0 to use number of cores. Experimental."
     "Default: %(default)s.")
 parser.add_argument(
     "--backend",
@@ -186,6 +186,7 @@ def run(argv=sys.argv[1:]):
         print("Done.")
 
     start = time.time()
+    work_items = []
     for (h, hyperparameters) in enumerate(hyperparameters_lst):
         n_models = None
         if 'n_models' in hyperparameters:
@@ -198,7 +199,6 @@ def run(argv=sys.argv[1:]):
         if args.max_epochs:
             hyperparameters['max_epochs'] = args.max_epochs
 
-        work_items = []
         for (i, allele) in enumerate(df.allele.unique()):
             for model_group in range(n_models):
                 work_dict = {
@@ -217,30 +217,29 @@ def run(argv=sys.argv[1:]):
                 }
                 work_items.append(work_dict)
 
-        if worker_pool:
-            print("Processing %d work items in parallel." % len(work_items))
-
-            predictors = list(
-                tqdm.tqdm(
-                    worker_pool.imap_unordered(
-                        train_model_entrypoint, work_items, chunksize=1),
-                    ascii=True,
-                    total=len(work_items)))
-
-            print("Merging %d predictors fit in parallel." % (len(predictors)))
-            predictor = Class1AffinityPredictor.merge([predictor] + predictors)
-            print("Saving merged predictor to: %s" % args.out_models_dir)
-            predictor.save(args.out_models_dir)
-        else:
-            # Run in serial. In this case, every worker is passed the same predictor,
-            # which it adds models to, so no merging is required. It also saves
-            # as it goes so no saving is required at the end.
-            start = time.time()
-            for _ in tqdm.trange(len(work_items)):
-                item = work_items.pop(0)  # want to keep freeing up memory
-                work_predictor = train_model_entrypoint(item)
-                assert work_predictor is predictor
-            assert not work_items
+    if worker_pool:
+        print("Processing %d work items in parallel." % len(work_items))
+        predictors = list(
+            tqdm.tqdm(
+                worker_pool.imap_unordered(
+                    train_model_entrypoint, work_items, chunksize=1),
+                ascii=True,
+                total=len(work_items)))
+
+        print("Merging %d predictors fit in parallel." % (len(predictors)))
+        predictor = Class1AffinityPredictor.merge([predictor] + predictors)
+        print("Saving merged predictor to: %s" % args.out_models_dir)
+        predictor.save(args.out_models_dir)
+    else:
+        # Run in serial. In this case, every worker is passed the same predictor,
+        # which it adds models to, so no merging is required. It also saves
+        # as it goes so no saving is required at the end.
+        start = time.time()
+        for _ in tqdm.trange(len(work_items)):
+            item = work_items.pop(0)  # want to keep freeing up memory
+            work_predictor = train_model_entrypoint(item)
+            assert work_predictor is predictor
+        assert not work_items
 
     print("*" * 30)
     training_time = time.time() - start
@@ -252,18 +251,18 @@ def run(argv=sys.argv[1:]):
         worker_pool.close()
         worker_pool.join()
 
+    start = time.time()
     if args.percent_rank_calibration_num_peptides_per_length > 0:
         alleles = list(predictor.supported_alleles)
         first_allele = alleles.pop(0)
 
         print("Performing percent rank calibration. Calibrating first allele.")
-        start = time.time()
         encoded_peptides = predictor.calibrate_percentile_ranks(
             alleles=[first_allele],
             num_peptides_per_length=args.percent_rank_calibration_num_peptides_per_length)
-        percent_rank_calibration_time = time.time() - start
+        assert encoded_peptides.encoding_cache  # must have cached the encoding
         print("Finished calibrating percent ranks for first allele in %0.2f sec." % (
-            percent_rank_calibration_time))
+            time.time() - start))
         print("Calibrating %d additional alleles." % len(alleles))
 
         if args.calibration_num_jobs == 1:
@@ -298,6 +297,8 @@ def run(argv=sys.argv[1:]):
         print("Done calibrating %d additional alleles." % len(alleles))
         predictor.save(args.out_models_dir, model_names_to_write=[])
 
+    percent_rank_calibration_time = time.time() - start
+
     if worker_pool:
         worker_pool.close()
         worker_pool.join()
@@ -360,6 +361,9 @@ def train_model(
 
     if allele_num == 0 and model_group == 0:
         # For the first model for the first allele, print the architecture.
+        print("*** HYPERPARAMETER SET %d***" %
+              (hyperparameter_set_num + 1))
+        print(hyperparameters)
         print("*** ARCHITECTURE FOR HYPERPARAMETER SET %d***" %
               (hyperparameter_set_num + 1))
         model.network(borrow=True).summary()