diff --git a/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
index d3d232f7ed81e3e804b7481f8eb6f6eeafbd8946..d8b70a64532a95ee399aced3412e0ed0d1e1e944 100755
--- a/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
+++ b/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh
@@ -61,7 +61,7 @@ do
         --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
         --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
         --held-out-measurements-per-allele-fraction-and-max 0.25 100 \
-        --ensemble-size 4 \
+        --num-folds 4 \
         --hyperparameters hyperparameters.yaml \
         --out-models-dir $(pwd)/models.${kind} \
         --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
diff --git a/downloads-generation/models_class1_pan_unselected/GENERATE.sh b/downloads-generation/models_class1_pan_unselected/GENERATE.sh
index 51676bf09fdc1c4bcb6f3ce83924d481c9736562..c5799bb9c7fa43b23d7011a5f0fddb972cce4cbb 100755
--- a/downloads-generation/models_class1_pan_unselected/GENERATE.sh
+++ b/downloads-generation/models_class1_pan_unselected/GENERATE.sh
@@ -73,7 +73,7 @@ do
         --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
         --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
         --held-out-measurements-per-allele-fraction-and-max 0.25 100 \
-        --ensemble-size 4 \
+        --num-folds 4 \
         --hyperparameters hyperparameters.yaml \
         --out-models-dir models.${kind} \
         --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
diff --git a/mhcflurry/select_pan_allele_models_command.py b/mhcflurry/select_pan_allele_models_command.py
index c931632b83643551e8323e68d2a738e035044a3f..b99657f467eebd419a74285785e88e96b3cb84e2 100644
--- a/mhcflurry/select_pan_allele_models_command.py
+++ b/mhcflurry/select_pan_allele_models_command.py
@@ -51,11 +51,6 @@ parser.add_argument(
     help=(
         "Model selection data CSV. Expected columns: "
         "allele, peptide, measurement_value"))
-parser.add_argument(
-    "--folds",
-    metavar="FILE.csv",
-    required=False,
-    help=(""))
 parser.add_argument(
     "--models-dir",
     metavar="DIR",
@@ -161,19 +156,6 @@ def run(argv=sys.argv[1:]):
 
     metadata_dfs = {}
 
-    if args.folds:
-        folds_df = pandas.read_csv(args.folds)
-        matches = all([
-            len(folds_df) == len(df),
-            (folds_df.peptide == df.peptide).all(),
-            (folds_df.allele == df.allele).all(),
-        ])
-        if not matches:
-            raise ValueError("Training data and fold data do not match")
-        fold_cols = [c for c in folds_df if c.startswith("fold_")]
-        for col in fold_cols:
-            df[col] = folds_df[col]
-
     fold_cols = [c for c in df if c.startswith("fold_")]
     num_folds = len(fold_cols)
     if num_folds <= 1:
@@ -193,8 +175,6 @@ def run(argv=sys.argv[1:]):
     df = df.loc[df.allele.isin(alleles)].dropna()
     print("Subselected to supported alleles: %s" % str(df.shape))
 
-    print("Selected %d alleles: %s" % (len(alleles), ' '.join(alleles)))
-
     metadata_dfs["model_selection_data"] = df
 
     df["mass_spec"] = df.measurement_source.str.contains(
@@ -248,13 +228,13 @@ def run(argv=sys.argv[1:]):
     if serial_run:
         # Serial run
         print("Running in serial.")
-        results = (do_model_select_task(item) for item in work_items)
+        results = (model_select(**item) for item in work_items)
     elif args.cluster_parallelism:
         # Run using separate processes HPC cluster.
         print("Running on cluster.")
         results = cluster_results_from_args(
             args,
-            work_function=do_model_select_task,
+            work_function=model_select,
             work_items=work_items,
             constant_data=GLOBAL_DATA,
             result_serialization_method="pickle")
@@ -268,7 +248,9 @@ def run(argv=sys.argv[1:]):
 
         # Parallel run
         results = worker_pool.imap_unordered(
-            do_model_select_task, work_items, chunksize=1)
+            do_model_select_task,
+            work_items,
+            chunksize=1)
 
     models_by_fold = {}
     summary_dfs = []
diff --git a/mhcflurry/train_pan_allele_models_command.py b/mhcflurry/train_pan_allele_models_command.py
index f20c9e3e3586b42a0ec8a9a5bc3c7a44875ca76c..5947183e12bc07e03376a349c476c396596e22cf 100644
--- a/mhcflurry/train_pan_allele_models_command.py
+++ b/mhcflurry/train_pan_allele_models_command.py
@@ -84,11 +84,11 @@ parser.add_argument(
     default=False,
     help="Do not use affinity value inequalities even when present in data")
 parser.add_argument(
-    "--ensemble-size",
+    "--num-folds",
     type=int,
+    default=4,
     metavar="N",
-    help="Ensemble size, i.e. how many models to retain the final predictor. "
-    "In the current implementation, this is also the number of training folds.")
+    help="Number of training folds.")
 parser.add_argument(
     "--num-replicates",
     type=int,
@@ -296,7 +296,7 @@ def initialize_training(args):
         "data",
         "out_models_dir",
         "hyperparameters",
-        "ensemble_size",
+        "num_folds",
     ]
     for arg in required_arguments:
         if getattr(args, arg) is None:
@@ -338,7 +338,7 @@ def initialize_training(args):
 
     folds_df = assign_folds(
         df=df,
-        num_folds=args.ensemble_size,
+        num_folds=args.num_folds,
         held_out_fraction=held_out_fraction,
         held_out_max=held_out_max)
 
@@ -387,14 +387,14 @@ def initialize_training(args):
             if not args.pretrain_data:
                 raise ValueError("--pretrain-data is required")
 
-        for fold in range(args.ensemble_size):
+        for fold in range(args.num_folds):
             for replicate in range(args.num_replicates):
                 work_dict = {
                     'work_item_name': str(uuid.uuid4()),
                     'architecture_num': h,
                     'num_architectures': len(hyperparameters_lst),
                     'fold_num': fold,
-                    'num_folds': args.ensemble_size,
+                    'num_folds': args.num_folds,
                     'replicate_num': replicate,
                     'num_replicates': args.num_replicates,
                     'hyperparameters': hyperparameters,
diff --git a/test/test_train_pan_allele_models_command.py b/test/test_train_pan_allele_models_command.py
index 53c5bb254de227e455aa598fd35182e5e19901eb..c214ec56d8f83ea4343358d324b8fd4f67f21e35 100644
--- a/test/test_train_pan_allele_models_command.py
+++ b/test/test_train_pan_allele_models_command.py
@@ -2,6 +2,11 @@
 Tests for training and predicting using Class1 pan-allele models.
 """
 
+import logging
+logging.getLogger('tensorflow').disabled = True
+logging.getLogger('matplotlib').disabled = True
+
+
 import json
 import os
 import shutil
@@ -36,7 +41,7 @@ HYPERPARAMETERS_LIST = [
     'learning_rate': None,
     'locally_connected_layers': [],
     'loss': 'custom:mse_with_inequalities',
-    'max_epochs': 5,
+    'max_epochs': 0,  # never selected
     'minibatch_size': 256,
     'optimizer': 'rmsprop',
     'output_activation': 'sigmoid',
@@ -100,7 +105,7 @@ HYPERPARAMETERS_LIST = [
     },
     'validation_split': 0.1,
 },
-][1:]
+]
 
 
 def run_and_check(n_jobs=0, delete=True, additional_args=[]):
@@ -114,37 +119,47 @@ def run_and_check(n_jobs=0, delete=True, additional_args=[]):
         get_path("data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
     selected_data_df = data_df.loc[data_df.allele.str.startswith("HLA-A")]
     selected_data_df.to_csv(
-        os.path.join(models_dir, "train_data.csv"), index=False)
+        os.path.join(models_dir, "_train_data.csv"), index=False)
 
     args = [
         "mhcflurry-class1-train-pan-allele-models",
-        "--data", os.path.join(models_dir, "train_data.csv"),
+        "--data", os.path.join(models_dir, "_train_data.csv"),
         "--allele-sequences", get_path("allele_sequences", "allele_sequences.csv"),
         "--hyperparameters", hyperparameters_filename,
         "--out-models-dir", models_dir,
         "--num-jobs", str(n_jobs),
-        "--ensemble-size", "2",
+        "--num-folds", "2",
         "--verbosity", "1",
-        # "--pretrain-data", get_path(
-        #      "random_peptide_predictions", "predictions.csv.bz2"),
     ] + additional_args
     print("Running with args: %s" % args)
     subprocess.check_call(args)
 
-    result = Class1AffinityPredictor.load(models_dir)
-    predictions = result.predict(
-        peptides=["SLYNTVATL"],
+    # Run model selection
+    models_dir_selected = tempfile.mkdtemp(
+        prefix="mhcflurry-test-models-selected")
+    args = [
+        "mhcflurry-class1-select-pan-allele-models",
+        "--data", os.path.join(models_dir, "train_data.csv.bz2"),
+        "--models-dir", models_dir,
+        "--out-models-dir", models_dir_selected,
+        "--max-models", "1",
+        "--num-jobs", str(n_jobs),
+    ] + additional_args
+    print("Running with args: %s" % args)
+    subprocess.check_call(args)
+
+    result = Class1AffinityPredictor.load(
+        models_dir_selected, optimization_level=0)
+    assert_equal(len(result.neural_networks), 2)
+    predictions = result.predict(peptides=["SLYNTVATL"],
         alleles=["HLA-A*02:01"])
     assert_equal(predictions.shape, (1,))
     assert_array_less(predictions, 1000)
-    df = result.predict_to_dataframe(
-            peptides=["SLYNTVATL"],
-            alleles=["HLA-A*02:01"])
-    print(df)
 
     if delete:
         print("Deleting: %s" % models_dir)
         shutil.rmtree(models_dir)
+        shutil.rmtree(models_dir_selected)
 
 
 if os.environ.get("KERAS_BACKEND") != "theano":