diff --git a/downloads-generation/models_class1/README.md b/downloads-generation/models_class1/README.md
index 604852a62546e4c30b49cd5fa544a6dae50feed2..ce784b40a1531cf4bcdec29cfcb9f8d32fb6039a 100644
--- a/downloads-generation/models_class1/README.md
+++ b/downloads-generation/models_class1/README.md
@@ -1,29 +1,9 @@
 # Class I allele-specific models (ensemble)
 
-This download contains trained MHC Class I allele-specific MHCflurry models. For each allele, an ensemble of predictors is trained on random halves of the training data. Model architectures are selected based on performance on the other half of the dataset, so in general each ensemble contains predictors of different architectures. At prediction time the geometric mean IC50 is taken over the trained models. The training data used is in the [data_combined_iedb_kim2014](../data_combined_iedb_kim2014) MHCflurry download.
+This download contains trained MHC Class I MHCflurry models.
 
-The training script supports multi-node parallel execution using the [kubeface](https://github.com/hammerlab/kubeface) library.
+To generate this download run:
 
-To use kubeface, you should make a google storage bucket and pass it below with the --storage-prefix argument. 
-
-To generate this download we run:
-
-```
-./GENERATE.sh \
-    --parallel-backend kubeface \
-    --target-tasks 200 \
-    --kubeface-backend kubernetes \
-    --kubeface-storage gs://kubeface-tim \
-    --kubeface-worker-image hammerlab/mhcflurry-misc:latest \
-    --kubeface-kubernetes-task-resources-memory-mb 10000 \
-    --kubeface-worker-path-prefix venv-py3/bin \
-    --kubeface-max-simultaneous-tasks 200 \
-    --kubeface-speculation-max-reruns 3 \
-```
-
-To debug locally:
-```
-./GENERATE.sh \
-    --parallel-backend local-threads \
-    --target-tasks 1
 ```
+./GENERATE.sh
+```
\ No newline at end of file
diff --git a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
index 63c846d20f94637df336be24aa46841e5f2f790b..fdc4bc2dca404f6b9ce38e9393b9100bb3669a2a 100644
--- a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
@@ -47,7 +47,8 @@ class Class1AffinityPredictor(object):
         manifest_df : pandas.DataFrame, optional
             Must have columns: model_name, allele, config_json, model.
             Only required if you want to update an existing serialization of a
-            Class1AffinityPredictor.
+            Class1AffinityPredictor. Otherwise this dataframe will be generated
+            automatically based on the supplied models.
         """
 
         if allele_to_allele_specific_models is None:
@@ -91,6 +92,12 @@ class Class1AffinityPredictor(object):
         """
         Serialize the predictor to a directory on disk.
         
+        The serialization format consists of a file called "manifest.csv" with
+        the configurations of each Class1NeuralNetwork, along with per-network
+        files giving the model weights. If there are pan-allele predictors in
+        the ensemble, the allele pseudosequences are also stored in the
+        directory.
+        
         Parameters
         ----------
         models_dir : string
diff --git a/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py b/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py
index fd62529fd37ab56438fefac68b6d00caeada479d..212e8918760249e80538bbc02bb9181b9701e68a 100644
--- a/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py
+++ b/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py
@@ -1,5 +1,5 @@
 """
-Train single allele models
+Train Class1 single allele models.
 
 """
 import sys
@@ -8,7 +8,6 @@ import json
 
 import pandas
 
-
 from .class1_affinity_predictor import Class1AffinityPredictor
 from ..common import configure_logging
 
@@ -17,14 +16,19 @@ parser = argparse.ArgumentParser(usage=__doc__)
 
 parser.add_argument(
     "--data",
+    metavar="FILE.csv",
     required=True,
-    help="Training data")
+    help=(
+        "Training data CSV. Expected columns: "
+        "allele, peptide, measurement_value"))
 parser.add_argument(
     "--out-models-dir",
+    metavar="DIR",
     required=True,
     help="Directory to write models and manifest")
 parser.add_argument(
     "--hyperparameters",
+    metavar="FILE.json",
     required=True,
     help="JSON of hyperparameters")
 parser.add_argument(
@@ -39,15 +43,10 @@ parser.add_argument(
     metavar="N",
     default=50,
     help="Train models for alleles with >=N measurements.")
-parser.add_argument(
-    "--only-quantitative",
-    action="store_true",
-    help="Exclude qualitative measurements",
-    default=False)
 parser.add_argument(
     "--verbosity",
     type=int,
-    help="Default: %(default)s",
+    help="Keras verbosity. Default: %(default)s",
     default=1)
 
 
diff --git a/mhcflurry/common.py b/mhcflurry/common.py
index 42aced01d520ecac3c05a4bdcb111107c0eb2c97..c34f19695f4642590702c4a8129eaefa2179ac37 100644
--- a/mhcflurry/common.py
+++ b/mhcflurry/common.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from __future__ import print_function, division, absolute_import
-from math import exp, log
 import itertools
 import collections
 import logging
@@ -28,73 +27,6 @@ import pandas
 from . import amino_acid
 
 
-class UnsupportedAllele(Exception):
-    pass
-
-
-def parse_int_list(s):
-    return [int(part.strip()) for part in s.split(",")]
-
-
-def split_uppercase_sequences(s):
-    return [part.strip().upper() for part in s.split(",")]
-
-
-MHC_PREFIXES = [
-    "HLA",
-    "H-2",
-    "Mamu",
-    "Patr",
-    "Gogo",
-    "ELA",
-]
-
-
-def normalize_allele_name(allele_name, default_prefix="HLA"):
-    """
-    Only works for a small number of species.
-
-    TODO: use the same logic as mhctools for MHC name parsing.
-    Possibly even worth its own small repo called something like "mhcnames"
-    """
-    allele_name = allele_name.upper()
-    # old school HLA-C serotypes look like "Cw"
-    allele_name = allele_name.replace("CW", "C")
-
-    prefix = default_prefix
-    for candidate in MHC_PREFIXES:
-        if (allele_name.startswith(candidate.upper()) or
-                allele_name.startswith(candidate.replace("-", "").upper())):
-            prefix = candidate
-            allele_name = allele_name[len(prefix):]
-            break
-    for pattern in MHC_PREFIXES + ["-", "*", ":"]:
-        allele_name = allele_name.replace(pattern, "")
-    return "%s%s" % (prefix + "-" if prefix else "", allele_name)
-
-
-def split_allele_names(s):
-    return [
-        normalize_allele_name(part.strip())
-        for part
-        in s.split(",")
-    ]
-
-
-def geometric_mean(xs, weights=None):
-    """
-    Geometric mean of a collection of affinity measurements, with optional
-    sample weights.
-    """
-    if len(xs) == 1:
-        return xs[0]
-    elif weights is None:
-        return exp(sum(log(xi) for xi in xs) / len(xs))
-    sum_weighted_log = sum(log(xi) * wi for (xi, wi) in zip(xs, weights))
-    denom = sum(weights)
-    return exp(sum_weighted_log / denom)
-
-
 def all_combinations(**dict_of_lists):
     """
     Iterator that generates all combinations of parameters given in the
@@ -107,36 +39,6 @@ def all_combinations(**dict_of_lists):
         yield dict(zip(arg_names, combination_of_values))
 
 
-def groupby_indices(iterable, key_fn=lambda x: x):
-    """
-    Returns dictionary mapping unique values to list of indices that
-    had those values.
-    """
-    index_groups = collections.defaultdict(list)
-    for i, x in enumerate(key_fn(x) for x in iterable):
-        index_groups[x].append(i)
-    return index_groups
-
-
-def shuffle_split_list(indices, fraction=0.5):
-    """
-    Split a list of indices into two sub-lists, with an optional parameter
-    controlling what fraction of the indices go to the left list.
-    """
-    indices = np.asarray(indices)
-    np.random.shuffle(indices)
-    n = len(indices)
-
-    left_count = int(np.ceil(fraction * n))
-
-    if n > 1 and left_count == 0:
-        left_count = 1
-    elif n > 1 and left_count == n:
-        left_count = n - 1
-
-    return indices[:left_count], indices[left_count:]
-
-
 def dataframe_cryptographic_hash(df):
     """
     Return a cryptographic (i.e. collisions extremely unlikely) hash
@@ -244,18 +146,21 @@ def drop_nulls_and_warn(df, related_df_with_same_index_to_describe=None):
     return new_df
 
 
-def from_ic50(ic50):
-    x = 1.0 - (numpy.log(ic50) / numpy.log(50000))
-    return numpy.minimum(
-        1.0,
-        numpy.maximum(0.0, x))
-
-
-def to_ic50(x):
-    return 50000.0 ** (1.0 - x)
-
-
 def amino_acid_distribution(peptides, smoothing=0.0):
+    """
+    Compute the fraction of each amino acid across a collection of peptides.
+    
+    Parameters
+    ----------
+    peptides : list of string
+    smoothing : float, optional
+        Small number (e.g. 0.01) to add to all amino acid fractions. The higher
+        the number the more uniform the distribution.
+
+    Returns
+    -------
+    pandas.Series indexed by amino acids
+    """
     peptides = pandas.Series(peptides)
     aa_counts = pandas.Series(peptides.map(collections.Counter).sum())
     normalized = aa_counts / aa_counts.sum()
diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py
index 86e38a1acf22d6984bc20ec595f76c2548e57919..4ffaeb0f5cc7bec6dea4ad5343b106500e53e40e 100644
--- a/mhcflurry/encodable_sequences.py
+++ b/mhcflurry/encodable_sequences.py
@@ -252,9 +252,14 @@ class EncodableSequences(object):
         string of length max_length
 
         """
-        assert len(klass.unknown_character) == 1
-        assert len(sequence) >= left_edge + right_edge, sequence
-        assert len(sequence) <= max_length, sequence
+        if len(sequence) < left_edge + right_edge:
+            raise ValueError(
+                "Sequence '%s' (length %d) unsupported: length must be at "
+                "least %d" % (sequence, len(sequence), left_edge + right_edge))
+        if len(sequence) > max_length:
+            raise ValueError(
+                "Sequence '%s' (length %d) unsupported: length must be at "
+                "most %d" % (sequence, len(sequence), max_length))
 
         middle_length = max_length - left_edge - right_edge
 
diff --git a/mhcflurry/predict_command.py b/mhcflurry/predict_command.py
index 58c37c0c21bd48306013d6108ed23922495c888e..a4603ad56ec01d148d4ebbb6754bd242c5d8ae70 100644
--- a/mhcflurry/predict_command.py
+++ b/mhcflurry/predict_command.py
@@ -41,10 +41,10 @@ from __future__ import (
 )
 import sys
 import argparse
-import logging
-import pandas
 import itertools
 
+import pandas
+
 from .downloads import get_path
 from .class1_affinity_prediction import Class1AffinityPredictor
 
@@ -99,7 +99,7 @@ parser.add_argument(
     metavar="DIR",
     default=None,
     help="Directory containing models. "
-    "Default: %s" % get_path("models_class1", test_exists=False))
+    "Default: %s" % get_path("models_class1", "models", test_exists=False))
 
 
 def run(argv=sys.argv[1:]):
@@ -148,24 +148,11 @@ def run(argv=sys.argv[1:]):
         # The reason we set the default here instead of in the argument parser is that
         # we want to test_exists at this point, so the user gets a message instructing
         # them to download the models if needed.
-        models_dir = get_path("models_class1")
+        models_dir = get_path("models_class1", "models")
     predictor = Class1AffinityPredictor.load(models_dir)
-
-    predictions = {}  # allele -> peptide -> value
-    for (allele, sub_df) in df.groupby(args.allele_column):
-        logging.info("Running %d predictions for allele %s" % (
-            len(sub_df), allele))
-        peptides = sub_df[args.peptide_column].values
-        predictions[allele] = dict(
-            (peptide, prediction)
-            for (peptide, prediction)
-            in zip(peptides, predictor.predict_for_allele(allele, peptides)))
-
-    logging.info("Collecting result")
-    df[args.prediction_column] = [
-        predictions[row[args.allele_column]][row[args.peptide_column]]
-        for (_, row) in df.iterrows()
-    ]
+    df[args.prediction_column] = predictor.predict(
+        peptides=df[args.peptide_column].values,
+        alleles=df[args.allele_column].values)
 
     if args.out:
         df.to_csv(args.out, index=False)
diff --git a/mhcflurry/scoring.py b/mhcflurry/scoring.py
index ad9046893f124053bd5ba2236c3b2464e766cbdc..fc2a926334cec48b176abc5ed51a379212ccec88 100644
--- a/mhcflurry/scoring.py
+++ b/mhcflurry/scoring.py
@@ -8,7 +8,7 @@ import sklearn
 import numpy
 import scipy
 
-from mhcflurry.regression_target import ic50_to_regression_target
+from .regression_target import from_ic50
 
 
 def make_scores(
@@ -39,7 +39,7 @@ def make_scores(
     dict with entries "auc", "f1", "tau"
     """
 
-    y_pred = ic50_to_regression_target(ic50_y_pred, max_ic50)
+    y_pred = from_ic50(ic50_y_pred, max_ic50)
     try:
         auc = sklearn.metrics.roc_auc_score(
             ic50_y <= threshold_nm,
diff --git a/test/test_predict_command.py b/test/test_predict_command.py
index 7cb64d27b753b2675f632006e0b46011d52263e8..7229b814689a75371ae1e2c8a285c71bcdf5f146 100644
--- a/test/test_predict_command.py
+++ b/test/test_predict_command.py
@@ -37,8 +37,8 @@ def test_csv():
 
 def test_no_csv():
     args = [
-        "--alleles", "HLA-A0201", "H-2Kb",
-        "--peptides", "SIINFEKL", "DENDREKLLL", "PICKLE",
+        "--alleles", "HLA-A0201", "H-2-Kb",
+        "--peptides", "SIINFEKL", "DENDREKLLL", "PICKLEEE",
         "--prediction-column", "prediction",
     ]
 
@@ -58,5 +58,5 @@ def test_no_csv():
     assert_equal(result.shape, (6, 3))
     sub_result1 = result.ix[result.peptide == "SIINFEKL"].set_index("allele")
     assert (
-        sub_result1.ix["H-2Kb"].prediction <
+        sub_result1.ix["H-2-Kb"].prediction <
         sub_result1.ix["HLA-A0201"].prediction)