diff --git a/README.md b/README.md
index a4373e2731df3d5308211f075d4dc782e24741fa..e47650dd0155e75e90d0a0f505843c89ec6dfc9e 100644
--- a/README.md
+++ b/README.md
@@ -138,6 +138,7 @@ mhctools \
     --mhc-predictor mhcflurry \
     --input-fasta-file INPUT.fasta \
     --mhc-alleles A02:01,A03:01 \
+    --mhc-peptide-lengths 8,9,10,11 \
     --extract-subsequences \
     --out RESULT.csv
 ```
diff --git a/downloads-generation/cross_validation_class1/GENERATE.sh b/downloads-generation/cross_validation_class1/GENERATE.sh
index 658460f6f43ba4cd0a132710f44c8fa240b6d450..c35e0862a0b97ddfa20e65e6117c8cdf8917d8da 100755
--- a/downloads-generation/cross_validation_class1/GENERATE.sh
+++ b/downloads-generation/cross_validation_class1/GENERATE.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
-
+#
+# Cross validation using the standard class I models.
+# Splits training data into 5 folds (stratifying on allele), trains and tests a
+# predictor on each (train, test) fold, and writes a summary CSV giving
+# performance for each allele on each fold.
+#
 set -e
 set -x
 
diff --git a/downloads-generation/cross_validation_class1/score.py b/downloads-generation/cross_validation_class1/score.py
index dcf366de2f9e4faf2a8fe885d8baa60b85b730bb..7af791c4ccf2bc6e7d4c77cda4d30999d7d40a2a 100644
--- a/downloads-generation/cross_validation_class1/score.py
+++ b/downloads-generation/cross_validation_class1/score.py
@@ -75,6 +75,8 @@ def run(argv):
             scores['kind'] = "single" if "single" in prediction_col else "ensemble"
             scores['train_size'] = allele_df[prediction_col].isnull().sum()
             scores['test_size'] = len(sub_df)
+
+            # make_scores returns a dict with entries "auc", "f1", "tau"
             scores.update(
                 make_scores(
                     sub_df.measurement_value, sub_df[prediction_col]))
diff --git a/downloads-generation/cross_validation_class1/split_folds.py b/downloads-generation/cross_validation_class1/split_folds.py
index 3f95337ffeb9dfeedcaa301fc9c8ecd58125b7c7..dd49085fd6818a9a751419edce1aec38cb2f2eaf 100644
--- a/downloads-generation/cross_validation_class1/split_folds.py
+++ b/downloads-generation/cross_validation_class1/split_folds.py
@@ -6,6 +6,7 @@ import sys
 from os.path import abspath
 
 import pandas
+import numpy
 from sklearn.model_selection import StratifiedKFold
 
 parser = argparse.ArgumentParser(usage = __doc__)
@@ -68,18 +69,25 @@ def run(argv):
     else:
         alleles = list(
             allele_counts.ix[
-                           allele_counts > args.min_measurements_per_allele
+                allele_counts > args.min_measurements_per_allele
         ].index)
 
-    df = df.ix[df.allele.isin(alleles)]
+    df = df.loc[df.allele.isin(alleles)].copy()
     print("Potentially subselected by allele to: %s" % str(df.shape))
 
     print("Data has %d alleles: %s" % (
         df.allele.nunique(), " ".join(df.allele.unique())))
 
+    print(df.head())
+
+    # Take log before taking median (in case of even number of samples).
+    df["measurement_value"] = numpy.log1p(df.measurement_value)
     df = df.groupby(["allele", "peptide"]).measurement_value.median().reset_index()
+    df["measurement_value"] = numpy.expm1(df.measurement_value)
     print("Took median for each duplicate peptide/allele pair: %s" % str(df.shape))
 
+    print(df.head())
+
     if args.subsample:
         df = df.head(args.subsample)
         print("Subsampled to: %s" % str(df.shape))
diff --git a/downloads-generation/data_curated/GENERATE.sh b/downloads-generation/data_curated/GENERATE.sh
index 89982e6a9c0b1f7ea598912270fd428f9a5019c6..f0a3d54aece76755ab92f1a6764243f014f112b4 100755
--- a/downloads-generation/data_curated/GENERATE.sh
+++ b/downloads-generation/data_curated/GENERATE.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
-
+#
+# Create "curated" training data, which combines an IEDB download with additional
+# published data, removes unusable entries, normalizes allele name, and performs
+# other filtering and standardization.
+#
 set -e
 set -x
 
diff --git a/downloads-generation/data_iedb/GENERATE.sh b/downloads-generation/data_iedb/GENERATE.sh
index 55156647d17eb21d90e3b136519f151bb3bd5cd9..a6067a36eb10c18ad1b26a17edb38f656dd106ae 100755
--- a/downloads-generation/data_iedb/GENERATE.sh
+++ b/downloads-generation/data_iedb/GENERATE.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
-
+#
+# Download latest MHC I ligand data from IEDB.
+#
 set -e
 set -x
 
diff --git a/downloads-generation/data_kim2014/GENERATE.sh b/downloads-generation/data_kim2014/GENERATE.sh
index baf8a3a453804e47296fd0dcc7aff85b9118ae7e..dbda0fe8c12df076e5e8f3b96728eefda51f23c6 100755
--- a/downloads-generation/data_kim2014/GENERATE.sh
+++ b/downloads-generation/data_kim2014/GENERATE.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
-
+#
+# Download some published MHC I ligand data from a location on Dropbox.
+#
+#
 set -e
 set -x
 
diff --git a/downloads-generation/models_class1/GENERATE.sh b/downloads-generation/models_class1/GENERATE.sh
index 2e509d2385196e8b95ff831425d3b52d3aa09814..b72334b536cca453300b52257eb8e9c65c7e0dd3 100755
--- a/downloads-generation/models_class1/GENERATE.sh
+++ b/downloads-generation/models_class1/GENERATE.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
-
+#
+# Train standard MHCflurry Class I models.
+# Calls mhcflurry-class1-train-allele-specific-models on curated training data
+# using the hyperparameters in "hyperparameters.yaml".
+#
 set -e
 set -x
 
diff --git a/downloads-generation/models_class1_experiments1/GENERATE.sh b/downloads-generation/models_class1_experiments1/GENERATE.sh
index 6edd43488d2331a8b7d3b3a875b35942d2fe856c..89921d924e819b8ac41c6e274730182664013e22 100755
--- a/downloads-generation/models_class1_experiments1/GENERATE.sh
+++ b/downloads-generation/models_class1_experiments1/GENERATE.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
-
+#
+# Train "experimental" models using various hyperparameter combinations.
+# This trains models only for a small number of alleles for which we have good
+# mass-spec validation data.
+#
 set -e
 set -x
 
diff --git a/mhcflurry/amino_acid.py b/mhcflurry/amino_acid.py
index 78b3a854d8ae08624a5ecd47b28ad0d766eb9a32..ffa5cffc2ae71f1e15f3069244a577399e6bfa20 100644
--- a/mhcflurry/amino_acid.py
+++ b/mhcflurry/amino_acid.py
@@ -80,7 +80,7 @@ X  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
 """), sep='\s+').loc[AMINO_ACIDS, AMINO_ACIDS]
 assert (BLOSUM62_MATRIX == BLOSUM62_MATRIX.T).all().all()
 
-ENCODING_DFS = {
+ENCODING_DATA_FRAMES = {
     "BLOSUM62": BLOSUM62_MATRIX,
     "one-hot": pandas.DataFrame([
         [1 if i == j else 0 for i in range(len(AMINO_ACIDS))]
@@ -98,7 +98,7 @@ def available_vector_encodings():
     list of string
 
     """
-    return list(ENCODING_DFS)
+    return list(ENCODING_DATA_FRAMES)
 
 
 def vector_encoding_length(name):
@@ -113,7 +113,7 @@ def vector_encoding_length(name):
     -------
     int
     """
-    return ENCODING_DFS[name].shape[1]
+    return ENCODING_DATA_FRAMES[name].shape[1]
 
 
 def index_encoding(sequences, letter_to_index_dict):
diff --git a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
index ada2e9e54cbf875a019db00f56c92cc2506e78bf..af94cfa05e167d49327a6f9cb5170952a909895d 100644
--- a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
@@ -760,6 +760,19 @@ class Class1AffinityPredictor(object):
             'peptide': peptides.sequences,
             'allele': alleles,
         })
+        if len(df) == 0:
+            # No predictions.
+            logging.warning("Predicting for 0 peptides.")
+            empty_result = pandas.DataFrame(
+                columns=[
+                    'peptide',
+                    'allele',
+                    'prediction',
+                    'prediction_low',
+                    'prediction_high'
+                ])
+            return empty_result
+
         df["normalized_allele"] = df.allele.map(
             mhcnames.normalize_allele_name)
 
diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py
index b98fb862ff4a4c3ffed2dac6d3e4ffef4d8471db..8477938b12ffa19b175b89e05de60b2806fafc24 100644
--- a/mhcflurry/encodable_sequences.py
+++ b/mhcflurry/encodable_sequences.py
@@ -135,7 +135,7 @@ class EncodableSequences(object):
                     max_length=max_length))
             result = amino_acid.fixed_vectors_encoding(
                 fixed_length_sequences,
-                amino_acid.ENCODING_DFS[vector_encoding_name])
+                amino_acid.ENCODING_DATA_FRAMES[vector_encoding_name])
             assert result.shape[0] == len(self.sequences)
             self.encoding_cache[cache_key] = result
         return self.encoding_cache[cache_key]