diff --git a/.travis.yml b/.travis.yml
index 079d7e82b72b642a597589aa7c7aa589b2d8a24a..159fd2d88ca6c63d76aae7ffcc9e0f1ef603f73b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,8 @@ install:
   - pip install coveralls
 env:
   global:
-    - PYTHONHASHSEED=0 
+    - PYTHONHASHSEED=0
+    - MKL_THREADING_LAYER=GNU  # for theano
   matrix:
     - KERAS_BACKEND=theano
     - KERAS_BACKEND=tensorflow
diff --git a/README.md b/README.md
index ae8b7355a0203fc302ff71e11adef6b051cc3bc6..cc0c93af37fac147f6a30cb630d712703906a778 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.org/hammerlab/mhcflurry.svg?branch=master)](https://travis-ci.org/hammerlab/mhcflurry) [![Coverage Status](https://coveralls.io/repos/github/hammerlab/mhcflurry/badge.svg?branch=master)](https://coveralls.io/github/hammerlab/mhcflurry?branch=master)
+[![Build Status](https://travis-ci.org/openvax/mhcflurry.svg?branch=master)](https://travis-ci.org/openvax/mhcflurry)
 
 # mhcflurry
 [MHC I](https://en.wikipedia.org/wiki/MHC_class_I) ligand
@@ -34,9 +34,9 @@ You can now generate predictions:
 $ mhcflurry-predict \
        --alleles HLA-A0201 HLA-A0301 \
        --peptides SIINFEKL SIINFEKD SIINFEKQ \
-       --out /tmp/predictions.csv \
+       --out /tmp/predictions.csv
        
 Wrote: /tmp/predictions.csv
 ```
 
-See the [documentation](http://openvax.github.io/mhcflurry/) for more details.
+See the [documentation](http://openvax.github.io/mhcflurry/) for more details.
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
index 3dfdf85d46949236d6a6fcd33a897ee201828804..8c1bf9d5c429ffa198fed21a652998f6f5f528e5 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -56,7 +56,6 @@ generate:
 	mhcflurry-downloads fetch models_class1 cross_validation_class1
 	python generate.py \
 	    --out-models-cv-rst _build/_models_cv.rst \
-	    --out-models-architecture-png _build/_models_architecture.png \
 	    --out-models-info-rst _build/_models_info.rst \
 	    --out-models-supported-alleles-rst _build/_models_supported_alleles.rst
 
diff --git a/docs/commandline_tutorial.rst b/docs/commandline_tutorial.rst
index 6dc803c81b40657c2be83f8d5106a46654fb1c73..ade306d6f4a781df3080cad53547a5e6eee37f29 100644
--- a/docs/commandline_tutorial.rst
+++ b/docs/commandline_tutorial.rst
@@ -111,7 +111,7 @@ training data. The data we use for our released predictors can be downloaded wit
 It looks like this:
 
 .. command-output::
-    bzcat "$(mhcflurry-downloads path data_curated)/curated_training_data.csv.bz2" | head -n 3
+    bzcat "$(mhcflurry-downloads path data_curated)/curated_training_data.no_mass_spec.csv.bz2" | head -n 3
     :shell:
     :nostderr:
 
diff --git a/docs/conf.py b/docs/conf.py
index be1d9ae6aa14dab0664790112c8df8957fdd7682..98df4244ca721115c27b62f72aea8c2bf9f6613a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -86,7 +86,7 @@ author = 'Timothy O\'Donnell'
 
 # The short X.Y version.
 # Added by Tim: reading version from mhcflurry __init__.py as in setup.py
-with open('../mhcflurry/__init__.py', 'r') as f:
+with open('../mhcflurry/version.py', 'r') as f:
     version = re.search(
         r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
         f.read(),
diff --git a/docs/generate.py b/docs/generate.py
index 848d04ce5a15c0d8391a4c1bd81e90b4a5229939..fb39ade21ba133c8775cf6a6d5c06bbdd9db3b60 100644
--- a/docs/generate.py
+++ b/docs/generate.py
@@ -4,11 +4,14 @@ Generate certain RST files used in documentation.
 
 import sys
 import argparse
+import json
 from textwrap import wrap
+from collections import OrderedDict
 
 import pypandoc
 import pandas
 from keras.utils.vis_utils import plot_model
+from tabulate import tabulate
 
 from mhcflurry import __version__
 from mhcflurry.downloads import get_path
@@ -89,18 +92,66 @@ def go(argv):
         # Architecture information rst
         if predictor is None:
             predictor = Class1AffinityPredictor.load(args.class1_models_dir)
-        network = predictor.neural_networks[0].network()
-        lines = []
-        network.summary(print_fn=lines.append)
+
+        representative_networks = OrderedDict()
+        for network in predictor.neural_networks:
+            config = json.dumps(network.hyperparameters)
+            if config not in representative_networks:
+                representative_networks[config] = network
+
+        all_hyperparameters = [
+            network.hyperparameters for network in representative_networks.values()
+        ]
+        hyperparameter_keys =  all_hyperparameters[0].keys()
+        assert all(
+            hyperparameters.keys() == hyperparameter_keys
+            for hyperparameters in all_hyperparameters)
+
+        constant_hyperparameter_keys = [
+            k for k in hyperparameter_keys
+            if all([
+                hyperparameters[k] == all_hyperparameters[0][k]
+                for hyperparameters in all_hyperparameters
+            ])
+        ]
+        constant_hypeparameters = dict(
+            (key, all_hyperparameters[0][key])
+            for key in sorted(constant_hyperparameter_keys)
+        )
+
+        def write_hyperparameters(fd, hyperparameters):
+            rows = []
+            for key in sorted(hyperparameters.keys()):
+                rows.append((key, json.dumps(hyperparameters[key])))
+            fd.write("\n")
+            fd.write(
+                tabulate(rows, ["Hyperparameter", "Value"], tablefmt="grid"))
 
         with open(args.out_models_info_rst, "w") as fd:
-            fd.write("Layers and parameters summary: ")
-            fd.write("\n\n::\n\n")
-            for line in lines:
-                fd.write("    ")
-                fd.write(line)
+            fd.write("Hyperparameters shared by all %d architectures:\n" %
+                len(representative_networks))
+            write_hyperparameters(fd, constant_hypeparameters)
+            fd.write("\n")
+            for (i, network) in enumerate(representative_networks.values()):
+                lines = []
+                network.network().summary(print_fn=lines.append)
+
+                fd.write("Architecture %d / %d:\n" % (
+                    (i + 1, len(representative_networks))))
+                fd.write("+" * 40)
                 fd.write("\n")
-            print("Wrote: %s" % args.out_models_info_rst)
+                write_hyperparameters(
+                    fd,
+                    dict(
+                        (key, value)
+                        for (key, value) in network.hyperparameters.items()
+                        if key not in constant_hypeparameters))
+                fd.write("\n\n::\n\n")
+                for line in lines:
+                    fd.write("    ")
+                    fd.write(line)
+                    fd.write("\n")
+        print("Wrote: %s" % args.out_models_info_rst)
 
     if args.out_models_cv_rst:
         # Models cv output
diff --git a/docs/models.rst b/docs/models.rst
index 4f7dee9edb93dcf686b830795aad79bf93c74bd7..96f2d1243e8975a3d6a3b31d7a88bbdf903680f8 100644
--- a/docs/models.rst
+++ b/docs/models.rst
@@ -1,35 +1,28 @@
 Details on the released models
 ===============================
 
-The released MHCflurry predictor consists of an ensemble of eight models for each
-supported allele. Each model in the ensemble was trained on a random 80% sample
-of the data for the allele, and the remaining 20% was used for early stopping.
-All models use the same architecture. The predictions are taken to be the geometric
-mean of the nM binding affinity predictions of the individual models. The script
-we run to train these models is in "downloads-generation/models_class1/GENERATE.sh"
-in the repository.
-
-Neural network architecture
+The released MHCflurry predictor consists of an ensemble of models for each
+supported allele. Each model in the ensemble was trained on a random 90% sample
+of the data for the allele, and the remaining data was used for early stopping.
+The predictions are taken to be the geometric mean of the nM binding affinity
+predictions of the individual models whose predictions fall in the middle 50% of
+values for a given prediction. The script we run to train these models is in
+"downloads-generation/models_class1/GENERATE.sh" in the repository.
+
+Neural network architectures
 -------------------------------------------------------------
 
-The neural network architecture is quite simple, consisting of a locally
-connected layer, a dense layer, and a sigmoid output.
-
 .. include:: /_build/_models_info.rst
 
-Architecture diagram:
-
-.. image:: /_build/_models_architecture.png
-
-Cross validation performance
--------------------------------------------------------------
+.. Cross validation performance
+.. -------------------------------------------------------------
 
-The accuracy of the MHCflurry downloadable models was estimated using 5-fold cross
-validation on the training data. The values shown here are the mean cross validation
-scores across folds.
+.. The accuracy of the MHCflurry downloadable models was estimated using 5-fold cross
+.. validation on the training data. The values shown here are the mean cross validation
+.. scores across folds.
 
-The AUC and F1 estimates use a 500 nM cutoff for distinguishing strong-binders
-from weak- or non-binders. The Kendall Tau score gives the rank correlation
-between the predicted and measured affinities; it uses no cutoff.
+.. The AUC and F1 estimates use a 500 nM cutoff for distinguishing strong-binders
+.. from weak- or non-binders. The Kendall Tau score gives the rank correlation
+.. between the predicted and measured affinities; it uses no cutoff.
 
-.. include:: /_build/_models_cv.rst
+.. .. include:: /_build/_models_cv.rst
diff --git a/docs/python_tutorial.rst b/docs/python_tutorial.rst
index f8a9379984b9fec2ce7a31a41ac60b29bb8c8881..5840db46484f64b430a96fb9ec86694af0328a01 100644
--- a/docs/python_tutorial.rst
+++ b/docs/python_tutorial.rst
@@ -65,7 +65,7 @@ We can get the path to this data from Python using `mhcflurry.downloads.get_path
 .. runblock:: pycon
 
     >>> from mhcflurry.downloads import get_path
-    >>> data_path = get_path("data_curated", "curated_training_data.csv.bz2")
+    >>> data_path = get_path("data_curated", "curated_training_data.no_mass_spec.csv.bz2")
     >>> data_path
 
 Now let's load it with pandas and filter to reasonably-sized peptides:
@@ -87,15 +87,16 @@ some models.
     >>> single_allele_train_data = df.loc[df.allele == "HLA-B*57:01"].sample(100)
     >>> new_predictor.fit_allele_specific_predictors(
     ...    n_models=1,
-    ...    architecture_hyperparameters={
+    ...    architecture_hyperparameters_list=[{
     ...         "layer_sizes": [16],
     ...         "max_epochs": 5,
     ...         "random_negative_constant": 5,
-    ...    },
+    ...    }],
     ...    peptides=single_allele_train_data.peptide.values,
     ...    affinities=single_allele_train_data.measurement_value.values,
     ...    allele="HLA-B*57:01")
 
+
 The `~mhcflurry.Class1AffinityPredictor.fit_allele_specific_predictors` method
 can be called any number of times on the same instance to build up ensembles
 of models across alleles. The `architecture_hyperparameters` we specified are
diff --git a/docs/requirements.txt b/docs/requirements.txt
index a88547504c1d7ede3af6ca6d67010fc0614ade96..af205bbbf1b5613450cffece92b58334dd06ea34 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -7,3 +7,4 @@ numpydoc
 pypandoc
 mhctools
 pydot
+tabulate
diff --git a/downloads-generation/cross_validation_class1/GENERATE.sh b/downloads-generation/cross_validation_class1/GENERATE.sh
index c35e0862a0b97ddfa20e65e6117c8cdf8917d8da..e525c18d3ef0969e3f159bb752df2b0b03f71e84 100755
--- a/downloads-generation/cross_validation_class1/GENERATE.sh
+++ b/downloads-generation/cross_validation_class1/GENERATE.sh
@@ -30,12 +30,13 @@ git status
 
 cd $SCRATCH_DIR/$DOWNLOAD_NAME
 
-cp $SCRIPT_DIR/hyperparameters.yaml .
+python $SCRIPT_DIR/generate_hyperparameters.py > hyperparameters.yaml
+
 cp $SCRIPT_DIR/split_folds.py .
 cp $SCRIPT_DIR/score.py .
 
 time python split_folds.py \
-    "$(mhcflurry-downloads path data_curated)/curated_training_data.csv.bz2" \
+    "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \
     --min-measurements-per-allele 75 \
     --folds $NFOLDS \
     --random-state 1 \
@@ -52,6 +53,7 @@ do
         --hyperparameters hyperparameters.yaml \
         --out-models-dir models.fold_${fold} \
         --min-measurements-per-allele 0 \
+        --num-jobs 8 \
         --percent-rank-calibration-num-peptides-per-length 0 \
     2>&1 | tee -a LOG.train.fold_${fold}.txt &
 done
diff --git a/downloads-generation/cross_validation_class1/generate_hyperparameters.py b/downloads-generation/cross_validation_class1/generate_hyperparameters.py
new file mode 120000
index 0000000000000000000000000000000000000000..5f2599b4b2b4c418d1c296b43c55be87bb4d85ce
--- /dev/null
+++ b/downloads-generation/cross_validation_class1/generate_hyperparameters.py
@@ -0,0 +1 @@
+../models_class1/generate_hyperparameters.py
\ No newline at end of file
diff --git a/downloads-generation/cross_validation_class1/hyperparameters.yaml b/downloads-generation/cross_validation_class1/hyperparameters.yaml
deleted file mode 120000
index f32feef1682d757437fe1f0cee2a31c030d7508f..0000000000000000000000000000000000000000
--- a/downloads-generation/cross_validation_class1/hyperparameters.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../models_class1/hyperparameters.yaml
\ No newline at end of file
diff --git a/downloads-generation/data_curated/GENERATE.sh b/downloads-generation/data_curated/GENERATE.sh
index f0a3d54aece76755ab92f1a6764243f014f112b4..11eb52ce42dc8a26e34e2b659e76f70559c35eca 100755
--- a/downloads-generation/data_curated/GENERATE.sh
+++ b/downloads-generation/data_curated/GENERATE.sh
@@ -30,14 +30,31 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME
 
 cp $SCRIPT_DIR/curate.py .
 
+# No mass-spec data
 time python curate.py \
     --data-iedb \
         "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \
     --data-kim2014 \
-        "$(mhcflurry-downloads path data_kim2014)/bdata.20130222.mhci.public.1.txt" \
-    --out-csv curated_training_data.csv
+        "$(mhcflurry-downloads path data_published)/bdata.20130222.mhci.public.1.txt" \
+    --out-csv curated_training_data.no_mass_spec.csv
+
+# With mass-spec data
+# Note that we STILL drop mass-spec data from IEDB here, since this data seems
+# low-quality.
+time python curate.py \
+    --data-iedb \
+        "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \
+    --data-kim2014 \
+        "$(mhcflurry-downloads path data_published)/bdata.20130222.mhci.public.1.txt" \
+    --data-systemhc-atlas \
+        "$(mhcflurry-downloads path data_systemhcatlas)/data.csv.bz2" \
+    --data-abelin-mass-spec \
+        "$(mhcflurry-downloads path data_published)/abelin2017.hits.csv.bz2" \
+    --out-csv curated_training_data.with_mass_spec.csv
+
+bzip2 curated_training_data.no_mass_spec.csv
+bzip2 curated_training_data.with_mass_spec.csv
 
-bzip2 curated_training_data.csv
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 LOG.txt
 tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
diff --git a/downloads-generation/data_curated/curate.py b/downloads-generation/data_curated/curate.py
index 49eb75c50cd1827c40ad49c2d38d960101c6c43c..32f4a8540ad3ab39843a97f5f1d9374a7c004786 100755
--- a/downloads-generation/data_curated/curate.py
+++ b/downloads-generation/data_curated/curate.py
@@ -4,9 +4,6 @@ Train single allele models
 """
 import sys
 import argparse
-import json
-import os
-import pickle
 
 import pandas
 
@@ -32,18 +29,41 @@ parser.add_argument(
     action="append",
     default=[],
     help="Path to IEDB-style affinity data (e.g. mhc_ligand_full.csv)")
+parser.add_argument(
+    "--data-systemhc-atlas",
+    action="append",
+    default=[],
+    help="Path to systemhc-atlas-style mass-spec data")
+parser.add_argument(
+    "--data-abelin-mass-spec",
+    action="append",
+    default=[],
+    help="Path to Abelin Immunity 2017 mass-spec hits")
+parser.add_argument(
+    "--include-iedb-mass-spec",
+    action="store_true",
+    default=False,
+    help="Include mass-spec observations in IEDB")
+
 parser.add_argument(
     "--out-csv",
     required=True,
     help="Result file")
 
-QUALITATIVE_TO_AFFINITY = {
-    "Negative": 50000.0,
-    "Positive": 100.0,
-    "Positive-High": 50.0,
-    "Positive-Intermediate": 500.0,
-    "Positive-Low": 5000.0,
+QUALITATIVE_TO_AFFINITY_AND_INEQUALITY = {
+    "Negative": (5000.0, ">"),
+    "Positive": (500.0, "<"),  # used for mass-spec hits
+    "Positive-High": (100.0, "<"),
+    "Positive-Intermediate": (1000.0, "<"),
+    "Positive-Low": (5000.0, "<"),
 }
+QUALITATIVE_TO_AFFINITY = dict(
+    (key, value[0]) for (key, value)
+    in QUALITATIVE_TO_AFFINITY_AND_INEQUALITY.items())
+QUALITATIVE_TO_INEQUALITY = dict(
+    (key, value[1]) for (key, value)
+    in QUALITATIVE_TO_AFFINITY_AND_INEQUALITY.items())
+
 
 EXCLUDE_IEDB_ALLELES = [
     "HLA class I",
@@ -60,6 +80,7 @@ def load_data_kim2014(filename):
         True: "quantitative",
         False: "qualitative",
     })
+    df["measurement_inequality"] = df.inequality
     df["original_allele"] = df.mhc
     df["peptide"] = df.sequence
     df["allele"] = df.mhc.map(normalize_allele_name)
@@ -71,7 +92,58 @@ def load_data_kim2014(filename):
     return df
 
 
-def load_data_iedb(iedb_csv, include_qualitative=True):
+def load_data_systemhc_atlas(filename, min_probability=0.99):
+    df = pandas.read_csv(filename)
+    print("Loaded systemhc atlas data: %s" % str(df.shape))
+
+    df["measurement_source"] = "systemhc-atlas"
+    df["measurement_value"] = QUALITATIVE_TO_AFFINITY["Positive"]
+    df["measurement_inequality"] = "<"
+    df["measurement_type"] = "qualitative"
+    df["original_allele"] = df.top_allele
+    df["peptide"] = df.search_hit
+    df["allele"] = df.top_allele.map(normalize_allele_name)
+
+    print("Dropping un-parseable alleles: %s" % ", ".join(
+        str(x) for x in df.ix[df.allele == "UNKNOWN"]["top_allele"].unique()))
+    df = df.loc[df.allele != "UNKNOWN"]
+    print("Systemhc atlas data now: %s" % str(df.shape))
+
+    print("Dropping data points with probability < %f" % min_probability)
+    df = df.loc[df.prob >= min_probability]
+    print("Systemhc atlas data now: %s" % str(df.shape))
+
+    print("Removing duplicates")
+    df = df.drop_duplicates(["allele", "peptide"])
+    print("Systemhc atlas data now: %s" % str(df.shape))
+
+    return df
+
+
+def load_data_abelin_mass_spec(filename):
+    df = pandas.read_csv(filename)
+    print("Loaded Abelin mass-spec data: %s" % str(df.shape))
+
+    df["measurement_source"] = "abelin-mass-spec"
+    df["measurement_value"] = QUALITATIVE_TO_AFFINITY["Positive"]
+    df["measurement_inequality"] = "<"
+    df["measurement_type"] = "qualitative"
+    df["original_allele"] = df.allele
+    df["allele"] = df.original_allele.map(normalize_allele_name)
+
+    print("Dropping un-parseable alleles: %s" % ", ".join(
+        str(x) for x in df.ix[df.allele == "UNKNOWN"]["allele"].unique()))
+    df = df.loc[df.allele != "UNKNOWN"]
+    print("Abelin mass-spec data now: %s" % str(df.shape))
+
+    print("Removing duplicates")
+    df = df.drop_duplicates(["allele", "peptide"])
+    print("Abelin mass-spec data now: %s" % str(df.shape))
+
+    return df
+
+
+def load_data_iedb(iedb_csv, include_qualitative=True, include_mass_spec=False):
     iedb_df = pandas.read_csv(iedb_csv, skiprows=1, low_memory=False)
     print("Loaded iedb data: %s" % str(iedb_df.shape))
 
@@ -99,24 +171,29 @@ def load_data_iedb(iedb_csv, include_qualitative=True):
 
     quantitative = iedb_df.ix[iedb_df["Units"] == "nM"].copy()
     quantitative["measurement_type"] = "quantitative"
+    quantitative["measurement_inequality"] = "="
     print("Quantitative measurements: %d" % len(quantitative))
 
     qualitative = iedb_df.ix[iedb_df["Units"] != "nM"].copy()
     qualitative["measurement_type"] = "qualitative"
     print("Qualitative measurements: %d" % len(qualitative))
-    non_mass_spec_qualitative = qualitative.ix[
-        (~qualitative["Method/Technique"].str.contains("mass spec"))
-    ].copy()
-    non_mass_spec_qualitative["Quantitative measurement"] = (
-        non_mass_spec_qualitative["Qualitative Measure"].map(
-            QUALITATIVE_TO_AFFINITY))
-    print("Qualitative measurements after dropping MS: %d" % (
-        len(non_mass_spec_qualitative)))
+    if not include_mass_spec:
+        qualitative = qualitative.ix[
+            (~qualitative["Method/Technique"].str.contains("mass spec"))
+        ].copy()
+
+    qualitative["Quantitative measurement"] = (
+        qualitative["Qualitative Measure"].map(QUALITATIVE_TO_AFFINITY))
+    qualitative["measurement_inequality"] = (
+        qualitative["Qualitative Measure"].map(QUALITATIVE_TO_INEQUALITY))
+
+    print("Qualitative measurements (possibly after dropping MS): %d" % (
+        len(qualitative)))
 
     iedb_df = pandas.concat(
         (
             ([quantitative]) +
-            ([non_mass_spec_qualitative] if include_qualitative else [])),
+            ([qualitative] if include_qualitative else [])),
         ignore_index=True)
 
     print("IEDB measurements per allele:\n%s" % iedb_df.allele.value_counts())
@@ -145,6 +222,7 @@ def load_data_iedb(iedb_csv, include_qualitative=True):
         "Quantitative measurement"
     ].values
     train_data["measurement_source"] = iedb_df.category.values
+    train_data["measurement_inequality"] = iedb_df.measurement_inequality.values
 
     train_data["allele"] = iedb_df["allele"].values
     train_data["original_allele"] = iedb_df["Allele Name"].values
@@ -159,7 +237,7 @@ def run():
 
     dfs = []
     for filename in args.data_iedb:
-        df = load_data_iedb(filename)
+        df = load_data_iedb(filename, include_mass_spec=args.include_iedb_mass_spec)
         dfs.append(df)
     for filename in args.data_kim2014:
         df = load_data_kim2014(filename)
@@ -175,18 +253,31 @@ def run():
             ]
             print("Kim2014 data now: %s" % str(df.shape))
         dfs.append(df)
+    for filename in args.data_systemhc_atlas:
+        df = load_data_systemhc_atlas(filename)
+        dfs.append(df)
+    for filename in args.data_abelin_mass_spec:
+        df = load_data_abelin_mass_spec(filename)
+        dfs.append(df)
 
     df = pandas.concat(dfs, ignore_index=True)
+    print("Combined df: %s" % (str(df.shape)))
+
+    print("Removing combined duplicates")
+    df = df.drop_duplicates(["allele", "peptide", "measurement_value"])
+    print("New combined df: %s" % (str(df.shape)))
+
     df = df[[
         "allele",
         "peptide",
         "measurement_value",
+        "measurement_inequality",
         "measurement_type",
         "measurement_source",
         "original_allele",
     ]].sort_values(["allele", "peptide"]).dropna()
 
-    print("Combined df: %s" % (str(df.shape)))
+    print("Final combined df: %s" % (str(df.shape)))
 
     df.to_csv(args.out_csv, index=False)
     print("Wrote: %s" % args.out_csv)
diff --git a/downloads-generation/data_kim2014/README.md b/downloads-generation/data_kim2014/README.md
deleted file mode 100644
index bf42e01ccade69b63172d342c92a41d9e0497dcb..0000000000000000000000000000000000000000
--- a/downloads-generation/data_kim2014/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Kim 2014 Data
-
-This download contains the BD2009, BD2013, and BLIND datasets from [Dataset size and composition impact the reliability of performance benchmarks for peptide-MHC binding predictions](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241). BD2013 (augmented with more recent data from IEDB) are used to train the production MHCflurry models. BD2009 and BLIND are useful for performing validation on held-out data.
-
-These files are available on dropbox here:
-
- * https://dl.dropboxusercontent.com/u/3967524/bdata.2009.mhci.public.1.txt
- * https://dl.dropboxusercontent.com/u/3967524/bdata.20130222.mhci.public.1.txt
- * https://dl.dropboxusercontent.com/u/3967524/bdata.2013.mhci.public.blind.1.txt
-
-To generate this download run:
-
-```
-./GENERATE.sh
-```
\ No newline at end of file
diff --git a/downloads-generation/data_published/GENERATE.sh b/downloads-generation/data_published/GENERATE.sh
new file mode 100755
index 0000000000000000000000000000000000000000..916a901e160340f03f2039a640c44bf2460bff55
--- /dev/null
+++ b/downloads-generation/data_published/GENERATE.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Download some published MHC I ligand data
+#
+#
+set -e
+set -x
+
+DOWNLOAD_NAME=data_published
+SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
+SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
+
+mkdir -p "$SCRATCH_DIR"
+rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
+mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
+
+# Send stdout and stderr to a logfile included with the archive.
+exec >  >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
+exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
+
+# Log some environment info
+date
+pip freeze
+# git rev-parse HEAD
+git status
+
+cd $SCRATCH_DIR/$DOWNLOAD_NAME
+
+# Download kim2014 data
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.2009.mhci.public.1.txt
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.20130222.mhci.public.1.txt
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.2013.mhci.public.blind.1.txt
+
+# Download abelin et al 2017 data
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/abelin2017.hits.csv.bz2
+
+cp $SCRIPT_ABSOLUTE_PATH .
+bzip2 LOG.txt
+tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
+
+echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
diff --git a/downloads-generation/data_published/README.md b/downloads-generation/data_published/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..807adbef7bc6a293c4bd71f5fe1af13f33c47e5d
--- /dev/null
+++ b/downloads-generation/data_published/README.md
@@ -0,0 +1,24 @@
+# Published datasets
+
+These datasets are derived from publications and do not change.
+
+To generate this download run:
+
+```
+./GENERATE.sh
+```
+
+## Kim 2014
+
+This download contains the BD2009, BD2013, and BLIND datasets from
+[Dataset size and composition impact the reliability of performance benchmarks for peptide-MHC binding predictions](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241).
+
+BD2013 (augmented with more recent data from IEDB) are used to train the production
+MHCflurry models. BD2009 and BLIND are useful for performing validation on held-out data.
+
+
+## Abelin et al. Immunity 2017
+
+This download contains the peptides identified in
+[Mass Spectrometry Profiling of HLA-Associated Peptidomes in Mono-allelic Cells Enables More Accurate Epitope Prediction](https://www.ncbi.nlm.nih.gov/pubmed/28228285).
+
diff --git a/downloads-generation/data_kim2014/GENERATE.sh b/downloads-generation/data_systemhcatlas/GENERATE.sh
similarity index 66%
rename from downloads-generation/data_kim2014/GENERATE.sh
rename to downloads-generation/data_systemhcatlas/GENERATE.sh
index dbda0fe8c12df076e5e8f3b96728eefda51f23c6..1558409c2b981591681ae726a641e51283935ad9 100755
--- a/downloads-generation/data_kim2014/GENERATE.sh
+++ b/downloads-generation/data_systemhcatlas/GENERATE.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Download some published MHC I ligand data from a location on Dropbox.
+# Download some published MHC I ligands identified by mass-spec
 #
 #
 set -e
 set -x
 
-DOWNLOAD_NAME=data_kim2014
+DOWNLOAD_NAME=data_systemhcatlas
 SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
 SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
 
@@ -26,9 +26,9 @@ git status
 
 cd $SCRATCH_DIR/$DOWNLOAD_NAME
 
-wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.2009.mhci.public.1.txt
-wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.20130222.mhci.public.1.txt
-wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.2013.mhci.public.blind.1.txt
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/systemhc.20171121.combined.csv.bz2
+
+mv systemhc.20171121.combined.csv.bz2 data.csv.bz2
 
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 LOG.txt
diff --git a/downloads-generation/data_systemhcatlas/README.md b/downloads-generation/data_systemhcatlas/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5e66d9af8c400d8cceeaa4762bbb43bbea0493f8
--- /dev/null
+++ b/downloads-generation/data_systemhcatlas/README.md
@@ -0,0 +1,10 @@
+# SysteMHC database dump
+
+This is a data dump of the [SysteMHC Atlas](https://systemhcatlas.org/) provided
+by personal communication. It is distributed under the ODC Open Database License.
+
+To generate this download run:
+
+```
+./GENERATE.sh
+```
\ No newline at end of file
diff --git a/downloads-generation/models_class1/GENERATE.sh b/downloads-generation/models_class1/GENERATE.sh
index b72334b536cca453300b52257eb8e9c65c7e0dd3..275e5c1b2efa49142bea2c690c718789890f7c7d 100755
--- a/downloads-generation/models_class1/GENERATE.sh
+++ b/downloads-generation/models_class1/GENERATE.sh
@@ -29,14 +29,15 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME
 
 mkdir models
 
-cp $SCRIPT_DIR/hyperparameters.yaml .
+python $SCRIPT_DIR/generate_hyperparameters.py > hyperparameters.yaml
 
 time mhcflurry-class1-train-allele-specific-models \
-    --data "$(mhcflurry-downloads path data_curated)/curated_training_data.csv.bz2" \
+    --data "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \
     --hyperparameters hyperparameters.yaml \
     --out-models-dir models \
-    --percent-rank-calibration-num-peptides-per-length 1000000 \
-    --min-measurements-per-allele 75
+    --percent-rank-calibration-num-peptides-per-length 100000 \
+    --min-measurements-per-allele 75 \
+    --num-jobs 32 16
 
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 LOG.txt
diff --git a/downloads-generation/models_class1/generate_hyperparameters.py b/downloads-generation/models_class1/generate_hyperparameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcd48aa8cd3a4f8f9628f0260012de1adc656a5a
--- /dev/null
+++ b/downloads-generation/models_class1/generate_hyperparameters.py
@@ -0,0 +1,79 @@
+"""
+Generate grid of hyperparameters
+"""
+
+from sys import stdout
+from copy import deepcopy
+from yaml import dump
+
+base_hyperparameters = {
+    ##########################################
+    # ENSEMBLE SIZE
+    ##########################################
+    "n_models": 1,
+
+    ##########################################
+    # OPTIMIZATION
+    ##########################################
+    "max_epochs": 500,
+    "patience": 20,
+    "early_stopping": True,
+    "validation_split": 0.1,
+    "minibatch_size": 128,
+    "loss": "custom:mse_with_inequalities",
+
+    ##########################################
+    # RANDOM NEGATIVE PEPTIDES
+    ##########################################
+    "random_negative_rate": 0.2,
+    "random_negative_constant": 25,
+    "random_negative_affinity_min": 20000.0,
+    "random_negative_affinity_max": 50000.0,
+
+    ##########################################
+    # PEPTIDE REPRESENTATION
+    ##########################################
+    # One of "one-hot", "embedding", or "BLOSUM62".
+    "peptide_amino_acid_encoding": "BLOSUM62",
+    "use_embedding": False,  # maintained for backward compatability
+    "embedding_output_dim": 8,  # only used if using embedding
+    "kmer_size": 15,
+
+    ##########################################
+    # NEURAL NETWORK ARCHITECTURE
+    ##########################################
+    "locally_connected_layers": [
+        {
+            "filters": 8,
+            "activation": "tanh",
+            "kernel_size": 3
+        }
+    ],
+    "activation": "tanh",
+    "output_activation": "sigmoid",
+    "layer_sizes": [16],
+    "dense_layer_l1_regularization": 0.001,
+    "batch_normalization": False,
+    "dropout_probability": 0.0,
+}
+
+grid = []
+for dense_layer_size in [64, 32, 16]:
+    for l1 in [0.001, 0.01, 0.0]:
+        for num_lc in [0, 1, 2]:
+            for lc_kernel_size in [3, 5]:
+                new = deepcopy(base_hyperparameters)
+                new["layer_sizes"] = [dense_layer_size]
+                new["dense_layer_l1_regularization"] = l1
+                (lc_layer,) = new["locally_connected_layers"]
+                lc_layer['kernel_size'] = lc_kernel_size
+                if num_lc == 0:
+                    new["locally_connected_layers"] = []
+                elif num_lc == 1:
+                    new["locally_connected_layers"] = [lc_layer]
+                elif num_lc == 2:
+                    new["locally_connected_layers"] = [lc_layer, deepcopy(lc_layer)]
+                if not grid or new not in grid:
+                    grid.append(new)
+
+dump(grid, stdout)
diff --git a/downloads-generation/models_class1/hyperparameters.test.json b/downloads-generation/models_class1/hyperparameters.test.json
deleted file mode 100644
index 609fa583539ba82bae8179756204dfaa91e156df..0000000000000000000000000000000000000000
--- a/downloads-generation/models_class1/hyperparameters.test.json
+++ /dev/null
@@ -1,37 +0,0 @@
-[
-    {
-        "n_models": 8,
-        "max_epochs": 2,
-        "patience": 10,
-        "early_stopping": true,
-        "validation_split": 0.2,
-
-        "random_negative_rate": 0.0,
-        "random_negative_constant": 25,
-
-        "use_embedding": false,
-        "kmer_size": 15,
-        "batch_normalization": false,
-        "locally_connected_layers": [
-            {
-                "filters": 8,
-                "activation": "tanh",
-                "kernel_size": 3
-            },
-            {
-                "filters": 8,
-                "activation": "tanh",
-                "kernel_size": 3
-            }
-        ],
-        "activation": "relu",
-        "output_activation": "sigmoid",
-        "layer_sizes": [
-            32
-        ],
-        "random_negative_affinity_min": 20000.0,
-        "random_negative_affinity_max": 50000.0,
-        "dense_layer_l1_regularization": 0.001,
-        "dropout_probability": 0.0
-    }
-]
diff --git a/downloads-generation/models_class1/hyperparameters.yaml b/downloads-generation/models_class1/hyperparameters.yaml
deleted file mode 100644
index 9d38ad1af3f7c39b576e60e218fd9fb85eb4cd5f..0000000000000000000000000000000000000000
--- a/downloads-generation/models_class1/hyperparameters.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-[{
-##########################################
-# ENSEMBLE SIZE
-##########################################
-"n_models": 8,
-
-##########################################
-# OPTIMIZATION
-##########################################
-"max_epochs": 500,
-"patience": 20,
-"early_stopping": true,
-"validation_split": 0.2,
-"minibatch_size": 128,
-
-##########################################
-# RANDOM NEGATIVE PEPTIDES
-##########################################
-"random_negative_rate": 0.0,
-"random_negative_constant": 25,
-"random_negative_affinity_min": 20000.0,
-"random_negative_affinity_max": 50000.0,
-
-##########################################
-# PEPTIDE REPRESENTATION
-##########################################
-# One of "one-hot", "embedding", or "BLOSUM62".
-"peptide_amino_acid_encoding": "BLOSUM62",
-"use_embedding": false,  # maintained for backward compatability
-"embedding_output_dim": 8,  # only used if using embedding
-"kmer_size": 15,
-
-##########################################
-# NEURAL NETWORK ARCHITECTURE
-##########################################
-"locally_connected_layers": [
-    {
-        "filters": 8,
-        "activation": "tanh",
-        "kernel_size": 3
-    }
-],
-"activation": "relu",
-"output_activation": "sigmoid",
-"layer_sizes": [16],
-"dense_layer_l1_regularization": 0.001,
-"batch_normalization": false,
-"dropout_probability": 0.0,
-}]
diff --git a/downloads-generation/models_class1_no_mass_spec/GENERATE.sh b/downloads-generation/models_class1_no_mass_spec/GENERATE.sh
new file mode 100755
index 0000000000000000000000000000000000000000..20b749784ac1d2936d16fbe6504173cf9cac7fec
--- /dev/null
+++ b/downloads-generation/models_class1_no_mass_spec/GENERATE.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+#
+# Train standard MHCflurry Class I models.
+# Calls mhcflurry-class1-train-allele-specific-models on curated training data
+# using the hyperparameters in "hyperparameters.yaml".
+#
+set -e
+set -x
+
+DOWNLOAD_NAME=models_class1_no_mass_spec
+SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
+SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
+SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
+
+mkdir -p "$SCRATCH_DIR"
+rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
+mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
+
+# Send stdout and stderr to a logfile included with the archive.
+exec >  >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
+exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
+
+# Log some environment info
+date
+pip freeze
+git status
+
+cd $SCRATCH_DIR/$DOWNLOAD_NAME
+
+mkdir models
+
+python $SCRIPT_DIR/generate_hyperparameters.py > hyperparameters.yaml
+
+time mhcflurry-class1-train-allele-specific-models \
+    --data "$(mhcflurry-downloads path data_curated)/curated_training_data.no_mass_spec.csv.bz2" \
+    --hyperparameters hyperparameters.yaml \
+    --out-models-dir models \
+    --percent-rank-calibration-num-peptides-per-length 100000 \
+    --min-measurements-per-allele 75 \
+    --num-jobs 32 16
+
+cp $SCRIPT_ABSOLUTE_PATH .
+bzip2 LOG.txt
+tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
+
+echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
diff --git a/downloads-generation/models_class1_no_mass_spec/README.md b/downloads-generation/models_class1_no_mass_spec/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ce784b40a1531cf4bcdec29cfcb9f8d32fb6039a
--- /dev/null
+++ b/downloads-generation/models_class1_no_mass_spec/README.md
@@ -0,0 +1,9 @@
+# Class I allele-specific models (ensemble)
+
+This download contains trained MHC Class I MHCflurry models.
+
+To generate this download run:
+
+```
+./GENERATE.sh
+```
\ No newline at end of file
diff --git a/downloads-generation/models_class1_no_mass_spec/generate_hyperparameters.py b/downloads-generation/models_class1_no_mass_spec/generate_hyperparameters.py
new file mode 120000
index 0000000000000000000000000000000000000000..5f2599b4b2b4c418d1c296b43c55be87bb4d85ce
--- /dev/null
+++ b/downloads-generation/models_class1_no_mass_spec/generate_hyperparameters.py
@@ -0,0 +1 @@
+../models_class1/generate_hyperparameters.py
\ No newline at end of file
diff --git a/lint.sh b/lint.sh
index 32fbffb20ce4f5ea07b8743b0c7416bd7f14bfbe..1679b2381979a9aca4934362f9a3f43c2d597fb3 100755
--- a/lint.sh
+++ b/lint.sh
@@ -7,7 +7,7 @@ set -o errexit
 # - https://bitbucket.org/logilab/pylint/issues/701/false-positives-with-not-an-iterable-and
 # - https://bitbucket.org/logilab/pylint/issues/58
 
-find . -name '*.py' -not -name 'versioneer.py' -not -name _version.py \
+find . -name '*.py' -not -path "./docs/*" \
   | xargs pylint \
   --errors-only \
   --disable=print-statement,unsubscriptable-object,not-an-iterable,no-member
diff --git a/mhcflurry/__init__.py b/mhcflurry/__init__.py
index 600882fd9693f56f8cc89f45c91f8906ee899d3d..8538c4d3a9afb4f3b3b07b4de7c38c85f765be0a 100644
--- a/mhcflurry/__init__.py
+++ b/mhcflurry/__init__.py
@@ -1,7 +1,6 @@
-from mhcflurry.class1_affinity_predictor import Class1AffinityPredictor
-from mhcflurry.class1_neural_network import Class1NeuralNetwork
-
-__version__ = "1.0.0"
+from .class1_affinity_predictor import Class1AffinityPredictor
+from .class1_neural_network import Class1NeuralNetwork
+from .version import __version__
 
 __all__ = [
     "__version__",
diff --git a/mhcflurry/allele_encoding.py b/mhcflurry/allele_encoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..1fb7b1319f226b90a8c32bef513f049585bec46b
--- /dev/null
+++ b/mhcflurry/allele_encoding.py
@@ -0,0 +1,70 @@
+import numpy
+import pandas
+
+from .encodable_sequences import EncodableSequences
+from . import amino_acid
+
+class AlleleEncoding(object):
+    def __init__(
+            self,
+            alleles,
+            allele_to_fixed_length_sequence=None):
+        """
+        A place to cache encodings for a (potentially large) sequence of alleles.
+
+        Parameters
+        ----------
+        alleles : list of string
+            Allele names
+
+        allele_to_fixed_length_sequence : dict of str -> str
+            Allele name to fixed lengths sequence ("pseudosequence")
+        """
+
+        alleles = pandas.Series(alleles)
+
+        all_alleles = list(sorted(alleles.unique()))
+
+        self.allele_to_index = dict(
+            (allele, i)
+            for (i, allele) in enumerate(all_alleles))
+
+        self.indices = alleles.map(self.allele_to_index)
+
+        self.fixed_length_sequences = pandas.Series(
+            [allele_to_fixed_length_sequence[a] for a in all_alleles],
+            index=all_alleles)
+
+        self.encoding_cache = {}
+
+    def fixed_length_vector_encoded_sequences(self, vector_encoding_name):
+        """
+        Encode alleles.
+
+        Parameters
+        ----------
+        vector_encoding_name : string
+            How to represent amino acids.
+            One of "BLOSUM62", "one-hot", etc. Full list of supported vector
+            encodings is given by available_vector_encodings() in amino_acid.
+
+        Returns
+        -------
+        numpy.array with shape (num sequences, sequence length, m) where m is
+        vector_encoding_length(vector_encoding_name)
+        """
+        cache_key = (
+            "fixed_length_vector_encoding",
+            vector_encoding_name)
+        if cache_key not in self.encoding_cache:
+            index_encoded_matrix = amino_acid.index_encoding(
+                self.fixed_length_sequences.values,
+                amino_acid.AMINO_ACID_INDEX)
+            vector_encoded = amino_acid.fixed_vectors_encoding(
+                index_encoded_matrix,
+                amino_acid.ENCODING_DATA_FRAMES[vector_encoding_name])
+            result = vector_encoded[self.indices]
+            self.encoding_cache[cache_key] = result
+        return self.encoding_cache[cache_key]
+
+
diff --git a/mhcflurry/amino_acid.py b/mhcflurry/amino_acid.py
index 1077c64443c505ecb6961df50250e8ea89e586ee..6095c58fb1bc02d017bbd0fee1b1a62d074770f6 100644
--- a/mhcflurry/amino_acid.py
+++ b/mhcflurry/amino_acid.py
@@ -153,7 +153,7 @@ def fixed_vectors_encoding(index_encoded_sequences, letter_to_vector_df):
     target_shape = (
         num_sequences, sequence_length, letter_to_vector_df.shape[0])
     result = letter_to_vector_df.iloc[
-        index_encoded_sequences.flat
+        index_encoded_sequences.flatten()
     ].values.reshape(target_shape)
     return result
 
diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py
index 1c523e35be470a8eecdf5ae218a9a110b29ec075..0b90d8c3b1a565ad1285adb576316150eee1cc47 100644
--- a/mhcflurry/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_predictor.py
@@ -2,11 +2,13 @@ import collections
 import hashlib
 import json
 import logging
-import sys
 import time
 import warnings
-from os.path import join, exists
+from os.path import join, exists, abspath
 from os import mkdir
+from socket import gethostname
+from getpass import getuser
+from functools import partial
 
 import mhcnames
 import numpy
@@ -14,12 +16,15 @@ import pandas
 from numpy.testing import assert_equal
 from six import string_types
 
-from mhcflurry.class1_neural_network import Class1NeuralNetwork
-from mhcflurry.common import random_peptides
-from mhcflurry.downloads import get_path
-from mhcflurry.encodable_sequences import EncodableSequences
-from mhcflurry.percent_rank_transform import PercentRankTransform
-from mhcflurry.regression_target import to_ic50
+from .class1_neural_network import Class1NeuralNetwork
+from .common import random_peptides
+from .downloads import get_default_class1_models_dir
+from .encodable_sequences import EncodableSequences
+from .percent_rank_transform import PercentRankTransform
+from .regression_target import to_ic50
+from .version import __version__
+from .ensemble_centrality import CENTRALITY_MEASURES
+from .allele_encoding import AlleleEncoding
 
 
 class Class1AffinityPredictor(object):
@@ -35,7 +40,7 @@ class Class1AffinityPredictor(object):
             self,
             allele_to_allele_specific_models=None,
             class1_pan_allele_models=None,
-            allele_to_pseudosequence=None,
+            allele_to_fixed_length_sequence=None,
             manifest_df=None,
             allele_to_percent_rank_transform=None):
         """
@@ -47,7 +52,7 @@ class Class1AffinityPredictor(object):
         class1_pan_allele_models : list of `Class1NeuralNetwork`
             Ensemble of pan-allele models.
         
-        allele_to_pseudosequence : dict of string -> string
+        allele_to_fixed_length_sequence : dict of string -> string
             Required only if class1_pan_allele_models is specified.
         
         manifest_df : `pandas.DataFrame`, optional
@@ -66,11 +71,11 @@ class Class1AffinityPredictor(object):
             class1_pan_allele_models = []
 
         if class1_pan_allele_models:
-            assert allele_to_pseudosequence, "Pseudosequences required"
+            assert allele_to_fixed_length_sequence, "Allele sequences required"
 
         self.allele_to_allele_specific_models = allele_to_allele_specific_models
         self.class1_pan_allele_models = class1_pan_allele_models
-        self.allele_to_pseudosequence = allele_to_pseudosequence
+        self.allele_to_fixed_length_sequence = allele_to_fixed_length_sequence
 
         if manifest_df is None:
             rows = []
@@ -136,7 +141,7 @@ class Class1AffinityPredictor(object):
 
         allele_to_allele_specific_models = collections.defaultdict(list)
         class1_pan_allele_models = []
-        allele_to_pseudosequence = predictors[0].allele_to_pseudosequence
+        allele_to_fixed_length_sequence = predictors[0].allele_to_fixed_length_sequence
 
         for predictor in predictors:
             for (allele, networks) in (
@@ -148,7 +153,7 @@ class Class1AffinityPredictor(object):
         return Class1AffinityPredictor(
             allele_to_allele_specific_models=allele_to_allele_specific_models,
             class1_pan_allele_models=class1_pan_allele_models,
-            allele_to_pseudosequence=allele_to_pseudosequence
+            allele_to_fixed_length_sequence=allele_to_fixed_length_sequence
         )
 
     @property
@@ -161,8 +166,8 @@ class Class1AffinityPredictor(object):
         list of string
         """
         result = set(self.allele_to_allele_specific_models)
-        if self.allele_to_pseudosequence:
-            result = result.union(self.allele_to_pseudosequence)
+        if self.allele_to_fixed_length_sequence:
+            result = result.union(self.allele_to_fixed_length_sequence)
         return sorted(result)
 
     @property
@@ -192,8 +197,9 @@ class Class1AffinityPredictor(object):
         The serialization format consists of a file called "manifest.csv" with
         the configurations of each Class1NeuralNetwork, along with per-network
         files giving the model weights. If there are pan-allele predictors in
-        the ensemble, the allele pseudosequences are also stored in the
-        directory.
+        the ensemble, the allele sequences are also stored in the
+        directory. There is also a small file "index.txt" with basic metadata:
+        when the models were trained, by whom, on what host.
         
         Parameters
         ----------
@@ -234,6 +240,26 @@ class Class1AffinityPredictor(object):
         write_manifest_df.to_csv(manifest_path, index=False)
         logging.info("Wrote: %s" % manifest_path)
 
+        # Write "info.txt"
+        info_path = join(models_dir, "info.txt")
+        rows = [
+            ("trained on", time.asctime()),
+            ("package   ", "mhcflurry %s" % __version__),
+            ("hostname  ", gethostname()),
+            ("user      ", getuser()),
+        ]
+        pandas.DataFrame(rows).to_csv(
+            info_path, sep="\t", header=False, index=False)
+
+        if self.allele_to_fixed_length_sequence is not None:
+            allele_to_sequence_df = pandas.DataFrame(
+                list(self.allele_to_fixed_length_sequence.items()),
+                columns=['allele', 'sequence']
+            )
+            allele_to_sequence_df.to_csv(
+                join(models_dir, "allele_sequences.csv"), index=False)
+            logging.info("Wrote: %s" % join(models_dir, "allele_sequences.csv"))
+
         if self.allele_to_percent_rank_transform:
             percent_ranks_df = None
             for (allele, transform) in self.allele_to_percent_rank_transform.items():
@@ -267,7 +293,7 @@ class Class1AffinityPredictor(object):
         `Class1AffinityPredictor` instance
         """
         if models_dir is None:
-            models_dir = get_path("models_class1", "models")
+            models_dir = get_default_class1_models_dir()
 
         manifest_path = join(models_dir, "manifest.csv")
         manifest_df = pandas.read_csv(manifest_path, nrows=max_models)
@@ -278,9 +304,14 @@ class Class1AffinityPredictor(object):
         for (_, row) in manifest_df.iterrows():
             weights_filename = Class1AffinityPredictor.weights_path(
                 models_dir, row.model_name)
-            weights = Class1AffinityPredictor.load_weights(weights_filename)
             config = json.loads(row.config_json)
-            model = Class1NeuralNetwork.from_config(config, weights=weights)
+
+            # We will lazy-load weights when the network is used.
+            model = Class1NeuralNetwork.from_config(
+                config,
+                weights_loader=partial(
+                    Class1AffinityPredictor.load_weights,
+                    abspath(weights_filename)))
             if row.allele == "pan-class1":
                 class1_pan_allele_models.append(model)
             else:
@@ -289,10 +320,10 @@ class Class1AffinityPredictor(object):
 
         manifest_df["model"] = all_models
 
-        pseudosequences = None
-        if exists(join(models_dir, "pseudosequences.csv")):
-            pseudosequences = pandas.read_csv(
-                join(models_dir, "pseudosequences.csv"),
+        allele_to_fixed_length_sequence = None
+        if exists(join(models_dir, "allele_sequences.csv")):
+            allele_to_fixed_length_sequence = pandas.read_csv(
+                join(models_dir, "allele_sequences.csv"),
                 index_col="allele").to_dict()
 
         allele_to_percent_rank_transform = {}
@@ -304,10 +335,10 @@ class Class1AffinityPredictor(object):
                     PercentRankTransform.from_series(percent_ranks_df[allele]))
 
         logging.info(
-            "Loaded %d class1 pan allele predictors, %d pseudosequences, "
+            "Loaded %d class1 pan allele predictors, %d allele sequences, "
             "%d percent rank distributions, and %d allele specific models: %s" % (
                 len(class1_pan_allele_models),
-                len(pseudosequences) if pseudosequences else 0,
+                len(allele_to_fixed_length_sequence) if allele_to_fixed_length_sequence else 0,
                 len(allele_to_percent_rank_transform),
                 sum(len(v) for v in allele_to_allele_specific_models.values()),
                 ", ".join(
@@ -318,7 +349,7 @@ class Class1AffinityPredictor(object):
         result = Class1AffinityPredictor(
             allele_to_allele_specific_models=allele_to_allele_specific_models,
             class1_pan_allele_models=class1_pan_allele_models,
-            allele_to_pseudosequence=pseudosequences,
+            allele_to_fixed_length_sequence=allele_to_fixed_length_sequence,
             manifest_df=manifest_df,
             allele_to_percent_rank_transform=allele_to_percent_rank_transform,
         )
@@ -362,12 +393,13 @@ class Class1AffinityPredictor(object):
     def fit_allele_specific_predictors(
             self,
             n_models,
-            architecture_hyperparameters,
+            architecture_hyperparameters_list,
             allele,
             peptides,
             affinities,
+            inequalities=None,
             models_dir_for_save=None,
-            verbose=1,
+            verbose=0,
             progress_preamble=""):
         """
         Fit one or more allele specific predictors for a single allele using a
@@ -381,7 +413,10 @@ class Class1AffinityPredictor(object):
         n_models : int
             Number of neural networks to fit
         
-        architecture_hyperparameters : dict 
+        architecture_hyperparameters_list : list of dict
+            List of hyperparameters. If more than one set of hyperparameters
+            are specified, model selection over them is performed and model
+            with the lowest validation loss is selected for each fold.
                
         allele : string
         
@@ -389,6 +424,9 @@ class Class1AffinityPredictor(object):
         
         affinities : list of float
             nM affinities
+
+        inequalities : list of string, each element one of ">", "<", or "="
+            See Class1NeuralNetwork.fit for details.
         
         models_dir_for_save : string, optional
             If specified, the Class1AffinityPredictor is (incrementally) written
@@ -406,35 +444,83 @@ class Class1AffinityPredictor(object):
         """
 
         allele = mhcnames.normalize_allele_name(allele)
-        models = self._fit_predictors(
-            n_models=n_models,
-            architecture_hyperparameters=architecture_hyperparameters,
-            peptides=peptides,
-            affinities=affinities,
-            allele_pseudosequences=None,
-            verbose=verbose,
-            progress_preamble=progress_preamble)
-
         if allele not in self.allele_to_allele_specific_models:
             self.allele_to_allele_specific_models[allele] = []
 
-        models_list = []
-        for (i, model) in enumerate(models):
-            model_name = self.model_name(allele, i)
-            models_list.append(model)  # models is a generator
+        encodable_peptides = EncodableSequences.create(peptides)
+
+        n_architectures = len(architecture_hyperparameters_list)
+        if n_models > 1 or n_architectures > 1:
+            # Adjust progress info to indicate number of models and
+            # architectures.
+            pieces = []
+            if n_models > 1:
+                pieces.append("Model {model_num:2d} / {n_models:2d}")
+            if n_architectures > 1:
+                pieces.append(
+                    "Architecture {architecture_num:2d} / {n_architectures:2d}"
+                    " (best so far: {best_num})")
+            progress_preamble_template = "[ %s ] {user_progress_preamble}" % (
+                ", ".join(pieces))
+        else:
+            # Just use the user-provided progress message.
+            progress_preamble_template = "{user_progress_preamble}"
+
+        models = []
+        for model_num in range(n_models):
+            shuffle_permutation = numpy.random.permutation(len(affinities))
+
+            best_num = None
+            best_loss = None
+            best_model = None
+            for (architecture_num, architecture_hyperparameters) in enumerate(
+                    architecture_hyperparameters_list):
+                model = Class1NeuralNetwork(**architecture_hyperparameters)
+                model.fit(
+                    encodable_peptides,
+                    affinities,
+                    shuffle_permutation=shuffle_permutation,
+                    inequalities=inequalities,
+                    verbose=verbose,
+                    progress_preamble=progress_preamble_template.format(
+                        user_progress_preamble=progress_preamble,
+                        best_num="n/a" if best_num is None else best_num + 1,
+                        model_num=model_num + 1,
+                        n_models=n_models,
+                        architecture_num=architecture_num + 1,
+                        n_architectures=n_architectures))
+
+                if n_architectures > 1:
+                    # We require val_loss (i.e. a validation set) if we have
+                    # multiple architectures.
+                    loss = model.loss_history['val_loss'][-1]
+                else:
+                    loss = None
+                if loss is None or best_loss is None or best_loss > loss:
+                    best_loss = loss
+                    best_num = architecture_num
+                    best_model = model
+                del model
+
+            if n_architectures > 1:
+                print("Selected architecture %d." % (best_num + 1))
+
+            model_name = self.model_name(allele, model_num)
             row = pandas.Series(collections.OrderedDict([
                 ("model_name", model_name),
                 ("allele", allele),
-                ("config_json", json.dumps(model.get_config())),
-                ("model", model),
+                ("config_json", json.dumps(best_model.get_config())),
+                ("model", best_model),
             ])).to_frame().T
             self.manifest_df = pandas.concat(
                 [self.manifest_df, row], ignore_index=True)
-            self.allele_to_allele_specific_models[allele].append(model)
+            self.allele_to_allele_specific_models[allele].append(best_model)
             if models_dir_for_save:
                 self.save(
                     models_dir_for_save, model_names_to_write=[model_name])
-        return models_list
+            models.append(best_model)
+
+        return models
 
     def fit_class1_pan_allele_models(
             self,
@@ -443,6 +529,7 @@ class Class1AffinityPredictor(object):
             alleles,
             peptides,
             affinities,
+            inequalities,
             models_dir_for_save=None,
             verbose=1,
             progress_preamble=""):
@@ -461,12 +548,15 @@ class Class1AffinityPredictor(object):
         architecture_hyperparameters : dict
         
         alleles : list of string
-            Allele names (not pseudosequences) corresponding to each peptide 
+            Allele names (not sequences) corresponding to each peptide
         
         peptides : `EncodableSequences` or list of string
         
         affinities : list of float
             nM affinities
+
+        inequalities : list of string, each element one of ">", "<", or "="
+            See Class1NeuralNetwork.fit for details.
         
         models_dir_for_save : string, optional
             If specified, the Class1AffinityPredictor is (incrementally) written
@@ -484,21 +574,24 @@ class Class1AffinityPredictor(object):
         """
 
         alleles = pandas.Series(alleles).map(mhcnames.normalize_allele_name)
-        allele_pseudosequences = alleles.map(self.allele_to_pseudosequence)
+        allele_encoding = AlleleEncoding(
+            alleles,
+            allele_to_fixed_length_sequence=self.allele_to_fixed_length_sequence)
 
-        models = self._fit_predictors(
-            n_models=n_models,
-            architecture_hyperparameters=architecture_hyperparameters,
-            peptides=peptides,
-            affinities=affinities,
-            allele_pseudosequences=allele_pseudosequences,
-            verbose=verbose,
-            progress_preamble=progress_preamble)
+        encodable_peptides = EncodableSequences.create(peptides)
+        models = []
+        for i in range(n_models):
+            logging.info("Training model %d / %d" % (i + 1, n_models))
+            model = Class1NeuralNetwork(**architecture_hyperparameters)
+            model.fit(
+                encodable_peptides,
+                affinities,
+                inequalities=inequalities,
+                allele_encoding=allele_encoding,
+                verbose=verbose,
+                progress_preamble=progress_preamble)
 
-        models_list = []
-        for (i, model) in enumerate(models):
             model_name = self.model_name("pan-class1", i)
-            models_list.append(model)  # models is a generator
             self.class1_pan_allele_models.append(model)
             row = pandas.Series(collections.OrderedDict([
                 ("model_name", model_name),
@@ -511,113 +604,9 @@ class Class1AffinityPredictor(object):
             if models_dir_for_save:
                 self.save(
                     models_dir_for_save, model_names_to_write=[model_name])
-        return models_list
+            models.append(model)
 
-    def _fit_predictors(
-            self,
-            n_models,
-            architecture_hyperparameters,
-            peptides,
-            affinities,
-            allele_pseudosequences,
-            verbose=1,
-            progress_preamble = ""):
-        """
-        Private helper method
-        
-        Parameters
-        ----------
-        n_models : int
-        architecture_hyperparameters : dict
-        peptides : EncodableSequences or list of string
-        affinities : list of float
-        allele_pseudosequences : EncodableSequences or list of string
-        verbose : int
-        progress_preamble : string
-            Optional string of information to include in each progress update
-
-        Returns
-        -------
-        generator of `Class1NeuralNetwork`
-        """
-        encodable_peptides = EncodableSequences.create(peptides)
-        for i in range(n_models):
-            logging.info("Training model %d / %d" % (i + 1, n_models))
-            model = Class1NeuralNetwork(**architecture_hyperparameters)
-            model.fit(
-                encodable_peptides,
-                affinities,
-                allele_pseudosequences=allele_pseudosequences,
-                verbose=verbose,
-                progress_preamble=progress_preamble)
-            yield model
-
-    def calibrate_percentile_ranks(
-            self,
-            peptides=None,
-            num_peptides_per_length=int(1e5),
-            alleles=None,
-            bins=None,
-            quiet=False):
-        """
-        Compute the cumulative distribution of ic50 values for a set of alleles
-        over a large universe of random peptides, to enable computing quantiles in
-        this distribution later.
-
-        Parameters
-        ----------
-        peptides : sequence of string, optional
-            Peptides to use
-        num_peptides_per_length : int, optional
-            If peptides argument is not specified, then num_peptides_per_length
-            peptides are randomly sampled from a uniform distribution for each
-            supported length
-        alleles : sequence of string, optional
-            Alleles to perform calibration for. If not specified all supported
-            alleles will be calibrated.
-        bins : object
-            Anything that can be passed to numpy.histogram's "bins" argument
-            can be used here, i.e. either an integer or a sequence giving bin
-            edges. This is in ic50 space.
-        quiet : boolean
-            If False (default), status updates will be printed to stdout.
-        """
-        if bins is None:
-            bins = to_ic50(numpy.linspace(1, 0, 1000))
-
-        if alleles is None:
-            alleles = self.supported_alleles
-
-        if peptides is None:
-            peptides = []
-            lengths = range(
-                self.supported_peptide_lengths[0],
-                self.supported_peptide_lengths[1] + 1)
-            for length in lengths:
-                peptides.extend(
-                    random_peptides(num_peptides_per_length, length))
-
-        if quiet:
-            def msg(s):
-                pass
-        else:
-            def msg(s):
-                print(s)
-                sys.stdout.flush()
-
-        encoded_peptides = EncodableSequences.create(peptides)
-        for (i, allele) in enumerate(alleles):
-            msg("Calibrating percentile ranks for allele %03d/%03d: %s" % (
-                i + 1, len(alleles), allele))
-            start = time.time()
-            predictions = self.predict(encoded_peptides, allele=allele)
-            msg("Generated %d predictions in %0.2f sec." % (
-                len(predictions), time.time() - start))
-            transform = PercentRankTransform()
-            transform.fit(predictions, bins=bins)
-            self.allele_to_percent_rank_transform[allele] = transform
-            msg("Done calibrating allele %s in %0.2f sec." % (
-                allele, time.time() - start))
+        return models
 
     def percentile_ranks(self, affinities, allele=None, alleles=None, throw=True):
         """
@@ -665,7 +654,13 @@ class Class1AffinityPredictor(object):
                 sub_df.affinity, allele=allele, throw=throw)
         return df.result.values
 
-    def predict(self, peptides, alleles=None, allele=None, throw=True):
+    def predict(
+            self,
+            peptides,
+            alleles=None,
+            allele=None,
+            throw=True,
+            centrality_measure="robust_mean"):
         """
         Predict nM binding affinities.
         
@@ -686,6 +681,9 @@ class Class1AffinityPredictor(object):
             If True, a ValueError will be raised in the case of unsupported
             alleles or peptide lengths. If False, a warning will be logged and
             the predictions for the unsupported alleles or peptides will be NaN.
+        centrality_measure : string or callable
+            Measure of central tendency to use to combine predictions in the
+            ensemble. Options include: mean, median, robust_mean.
 
         Returns
         -------
@@ -697,6 +695,7 @@ class Class1AffinityPredictor(object):
             allele=allele,
             throw=throw,
             include_percentile_ranks=False,
+            centrality_measure=centrality_measure,
         )
         return df.prediction.values
 
@@ -707,7 +706,8 @@ class Class1AffinityPredictor(object):
             allele=None,
             throw=True,
             include_individual_model_predictions=False,
-            include_percentile_ranks=True):
+            include_percentile_ranks=True,
+            centrality_measure="robust_mean"):
         """
         Predict nM binding affinities. Gives more detailed output than `predict`
         method, including 5-95% prediction intervals.
@@ -736,6 +736,9 @@ class Class1AffinityPredictor(object):
             If True, a "prediction_percentile" column will be included giving the
             percentile ranks. If no percentile rank information is available,
             this will be ignored with a warning.
+        centrality_measure : string or callable
+            Measure of central tendency to use to combine predictions in the
+            ensemble. Options include: mean, median, robust_mean.
 
         Returns
         -------
@@ -794,27 +797,27 @@ class Class1AffinityPredictor(object):
             unsupported_alleles = [
                 allele for allele in
                 df.normalized_allele.unique()
-                if allele not in self.allele_to_pseudosequence
+                if allele not in self.allele_to_fixed_length_sequence
             ]
             if unsupported_alleles:
                 msg = (
-                    "No pseudosequences for allele(s): %s.\n"
+                    "No sequences for allele(s): %s.\n"
                     "Supported alleles: %s" % (
                         " ".join(unsupported_alleles),
-                        " ".join(sorted(self.allele_to_pseudosequence))))
+                        " ".join(sorted(self.allele_to_fixed_length_sequence))))
                 logging.warning(msg)
                 if throw:
                     raise ValueError(msg)
             mask = df.supported_peptide_length
             if mask.sum() > 0:
-                masked_allele_pseudosequences = (
-                    df.ix[mask].normalized_allele.map(
-                        self.allele_to_pseudosequence))
+                masked_allele_encoding = AlleleEncoding(
+                    df.loc[mask].normalized_allele,
+                    allele_to_fixed_length_sequence=self.allele_to_fixed_length_sequence)
                 masked_peptides = peptides.sequences[mask]
                 for (i, model) in enumerate(self.class1_pan_allele_models):
                     df.loc[mask, "model_pan_%d" % i] = model.predict(
                         masked_peptides,
-                        allele_pseudosequences=masked_allele_pseudosequences)
+                        allele_encoding=masked_allele_encoding)
 
         if self.allele_to_allele_specific_models:
             query_alleles = df.normalized_allele.unique()
@@ -852,9 +855,15 @@ class Class1AffinityPredictor(object):
         df_predictions = df[
             [c for c in df.columns if c.startswith("model_")]
         ]
+
+        if callable(centrality_measure):
+            centrality_function = centrality_measure
+        else:
+            centrality_function = CENTRALITY_MEASURES[centrality_measure]
+
         logs = numpy.log(df_predictions)
-        log_means = logs.mean(1)
-        df["prediction"] = numpy.exp(log_means)
+        log_centers = centrality_function(logs.values)
+        df["prediction"] = numpy.exp(log_centers)
         df["prediction_low"] = numpy.exp(logs.quantile(0.05, axis=1))
         df["prediction_high"] = numpy.exp(logs.quantile(0.95, axis=1))
 
@@ -918,3 +927,61 @@ class Class1AffinityPredictor(object):
         ]
         loaded.close()
         return weights
+
+    def calibrate_percentile_ranks(
+            self,
+            peptides=None,
+            num_peptides_per_length=int(1e5),
+            alleles=None,
+            bins=None):
+        """
+        Compute the cumulative distribution of ic50 values for a set of alleles
+        over a large universe of random peptides, to enable computing quantiles in
+        this distribution later.
+
+        Parameters
+        ----------
+        peptides : sequence of string or EncodableSequences, optional
+            Peptides to use
+        num_peptides_per_length : int, optional
+            If peptides argument is not specified, then num_peptides_per_length
+            peptides are randomly sampled from a uniform distribution for each
+            supported length
+        alleles : sequence of string, optional
+            Alleles to perform calibration for. If not specified all supported
+            alleles will be calibrated.
+        bins : object
+            Anything that can be passed to numpy.histogram's "bins" argument
+            can be used here, i.e. either an integer or a sequence giving bin
+            edges. This is in ic50 space.
+
+        Returns
+        ----------
+        EncodableSequences : peptides used for calibration
+        """
+        if bins is None:
+            bins = to_ic50(numpy.linspace(1, 0, 1000))
+
+        if alleles is None:
+            alleles = self.supported_alleles
+
+        if peptides is None:
+            peptides = []
+            lengths = range(
+                self.supported_peptide_lengths[0],
+                self.supported_peptide_lengths[1] + 1)
+            for length in lengths:
+                peptides.extend(
+                    random_peptides(num_peptides_per_length, length))
+
+        encoded_peptides = EncodableSequences.create(peptides)
+
+        for (i, allele) in enumerate(alleles):
+            predictions = self.predict(encoded_peptides, allele=allele)
+            transform = PercentRankTransform()
+            transform.fit(predictions, bins=bins)
+            self.allele_to_percent_rank_transform[allele] = transform
+
+        return encoded_peptides
+
+
diff --git a/mhcflurry/class1_neural_network.py b/mhcflurry/class1_neural_network.py
index eafbbf96398e58eb0028a8bfaadab62835377a67..936e75119f6974fb6b619ad4f57461a9a7e50339 100644
--- a/mhcflurry/class1_neural_network.py
+++ b/mhcflurry/class1_neural_network.py
@@ -5,12 +5,13 @@ import logging
 import numpy
 import pandas
 
-from mhcflurry.hyperparameters import HyperparameterDefaults
+from .hyperparameters import HyperparameterDefaults
 
-from mhcflurry.encodable_sequences import EncodableSequences
-from mhcflurry.amino_acid import available_vector_encodings, vector_encoding_length
-from mhcflurry.regression_target import to_ic50, from_ic50
-from mhcflurry.common import random_peptides, amino_acid_distribution
+from .encodable_sequences import EncodableSequences
+from .amino_acid import available_vector_encodings, vector_encoding_length
+from .regression_target import to_ic50, from_ic50
+from .common import random_peptides, amino_acid_distribution
+from .custom_loss import CUSTOM_LOSSES
 
 
 class Class1NeuralNetwork(object):
@@ -26,11 +27,13 @@ class Class1NeuralNetwork(object):
 
     network_hyperparameter_defaults = HyperparameterDefaults(
         kmer_size=15,
-        use_embedding=False,
         peptide_amino_acid_encoding="one-hot",
         embedding_input_dim=21,
         embedding_output_dim=8,
-        pseudosequence_use_embedding=False,
+        allele_dense_layer_sizes=[],
+        peptide_dense_layer_sizes=[],
+        peptide_allele_merge_method="multiply",
+        peptide_allele_merge_activation="",
         layer_sizes=[32],
         dense_layer_l1_regularization=0.001,
         dense_layer_l2_regularization=0.0,
@@ -106,13 +109,46 @@ class Class1NeuralNetwork(object):
     Combined set of all supported hyperparameters and their default values.
     """
 
+    # Hyperparameter renames.
+    # These are updated from time to time as new versions are developed. It
+    # provides a primitive way to allow new code to work with models trained
+    # using older code.
+    # None indicates the hyperparameter has been dropped.
+    hyperparameter_renames = {
+        "use_embedding": None,
+        "pseudosequence_use_embedding": None,
+    }
+
+    @classmethod
+    def apply_hyperparameter_renames(cls, hyperparameters):
+        """
+        Handle hyperparameter renames.
+
+        Parameters
+        ----------
+        hyperparameters : dict
+
+        Returns
+        -------
+        dict : updated hyperparameters
+
+        """
+        for (from_name, to_name) in cls.hyperparameter_renames.items():
+            if from_name in hyperparameters:
+                value = hyperparameters.pop(from_name)
+                if to_name:
+                    hyperparameters[to_name] = value
+        return hyperparameters
+
+
     def __init__(self, **hyperparameters):
         self.hyperparameters = self.hyperparameter_defaults.with_defaults(
-            hyperparameters)
+            self.apply_hyperparameter_renames(hyperparameters))
 
         self._network = None
         self.network_json = None
         self.network_weights = None
+        self.network_weights_loader = None
 
         self.loss_history = None
         self.fit_seconds = None
@@ -124,6 +160,13 @@ class Class1NeuralNetwork(object):
     (Keras model, existing network weights)
     """
 
+    @classmethod
+    def clear_model_cache(klass):
+        """
+        Clear the Keras model cache.
+        """
+        klass.KERAS_MODELS_CACHE.clear()
+
     @classmethod
     def borrow_cached_network(klass, network_json, network_weights):
         """
@@ -175,6 +218,7 @@ class Class1NeuralNetwork(object):
         keras.models.Model
         """
         if self._network is None and self.network_json is not None:
+            self.load_weights()
             if borrow:
                 return self.borrow_cached_network(
                     self.network_json,
@@ -205,10 +249,11 @@ class Class1NeuralNetwork(object):
         result = dict(self.__dict__)
         result['_network'] = None
         result['network_weights'] = None
+        result['network_weights_loader'] = None
         return result
 
     @classmethod
-    def from_config(cls, config, weights=None):
+    def from_config(cls, config, weights=None, weights_loader=None):
         """
         deserialize from a dict returned by get_config().
         
@@ -217,6 +262,8 @@ class Class1NeuralNetwork(object):
         config : dict
         weights : list of array, optional
             Network weights to restore
+        weights_loader : callable, optional
+            Function to call (no arguments) to load weights when needed
 
         Returns
         -------
@@ -227,8 +274,20 @@ class Class1NeuralNetwork(object):
         assert all(hasattr(instance, key) for key in config), config.keys()
         instance.__dict__.update(config)
         instance.network_weights = weights
+        instance.network_weights_loader = weights_loader
         return instance
 
+    def load_weights(self):
+        """
+        Load weights by evaluating self.network_weights_loader, if needed.
+
+        After calling this, self.network_weights_loader will be None and
+        self.network_weights will be the weights list, if available.
+        """
+        if self.network_weights_loader:
+            self.network_weights = self.network_weights_loader()
+            self.network_weights_loader = None
+
     def get_weights(self):
         """
         Get the network weights
@@ -239,6 +298,7 @@ class Class1NeuralNetwork(object):
         or None if there is no network
         """
         self.update_network_description()
+        self.load_weights()
         return self.network_weights
 
     def __getstate__(self):
@@ -251,7 +311,7 @@ class Class1NeuralNetwork(object):
 
         """
         self.update_network_description()
-        self.update_network_description()
+        self.load_weights()
         result = dict(self.__dict__)
         result['_network'] = None
         return result
@@ -270,8 +330,7 @@ class Class1NeuralNetwork(object):
         numpy.array
         """
         encoder = EncodableSequences.create(peptides)
-        if (self.hyperparameters['use_embedding'] or
-                self.hyperparameters['peptide_amino_acid_encoding'] == "embedding"):
+        if (self.hyperparameters['peptide_amino_acid_encoding'] == "embedding"):
             encoded = encoder.variable_length_to_fixed_length_categorical(
                 max_length=self.hyperparameters['kmer_size'],
                 **self.input_encoding_hyperparameter_defaults.subselect(
@@ -305,34 +364,29 @@ class Class1NeuralNetwork(object):
             self.hyperparameters['right_edge'],
         self.hyperparameters['kmer_size'])
 
-    def pseudosequence_to_network_input(self, pseudosequences):
+    def allele_encoding_to_network_input(self, allele_encoding):
         """
-        Encode pseudosequences to the fixed-length encoding expected by the neural
+        Encode alleles to the fixed-length encoding expected by the neural
         network (which depends on the architecture).
 
         Parameters
         ----------
-        pseudosequences : EncodableSequences or list of string
+        allele_encoding : AlleleEncoding
 
         Returns
         -------
         numpy.array
         """
-        encoder = EncodableSequences.create(pseudosequences)
-        if self.hyperparameters['pseudosequence_use_embedding']:
-            encoded = encoder.fixed_length_categorical()
-        else:
-            raise NotImplementedError
-            # encoded = encoder.fixed_length_one_hot()
-        assert len(encoded) == len(pseudosequences)
-        return encoded
+        return allele_encoding.fixed_length_vector_encoded_sequences("BLOSUM62")
 
     def fit(
             self,
             peptides,
             affinities,
-            allele_pseudosequences=None,
+            allele_encoding=None,
+            inequalities=None,
             sample_weights=None,
+            shuffle_permutation=None,
             verbose=1,
             progress_preamble=""):
         """
@@ -345,14 +399,26 @@ class Class1NeuralNetwork(object):
         affinities : list of float
             nM affinities. Must be same length of as peptides.
         
-        allele_pseudosequences : EncodableSequences or list of string, optional
+        allele_encoding : AlleleEncoding, optional
             If not specified, the model will be a single-allele predictor.
+
+        inequalities : list of string, each element one of ">", "<", or "=".
+            Inequalities to use for fitting. Same length as affinities.
+            Each element must be one of ">", "<", or "=". For example, a ">"
+            will train on y_pred > y_true for that element in the training set.
+            Requires using a custom losses that support inequalities (e.g.
+            mse_with_ineqalities).
+            If None all inequalities are taken to be "=".
             
         sample_weights : list of float, optional
             If not specified, all samples (including random negatives added
             during training) will have equal weight. If specified, the random
             negatives will be assigned weight=1.0.
-        
+
+        shuffle_permutation : list of int, optional
+            Permutation (integer list) of same length as peptides and affinities
+            If None, then a random permutation will be generated.
+
         verbose : int
             Keras verbosity level
 
@@ -391,37 +457,110 @@ class Class1NeuralNetwork(object):
 
         y_values = from_ic50(affinities)
         assert numpy.isnan(y_values).sum() == 0, numpy.isnan(y_values).sum()
+        if inequalities is not None:
+            # Reverse inequalities because from_ic50() flips the direction
+            # (i.e. lower affinity results in higher y values).
+            adjusted_inequalities = pandas.Series(inequalities).map({
+                "=": "=",
+                ">": "<",
+                "<": ">",
+            }).values
+        else:
+            adjusted_inequalities = numpy.tile("=", len(y_values))
+        if len(adjusted_inequalities) != len(y_values):
+            raise ValueError("Inequalities and y_values must have same length")
 
         x_dict_without_random_negatives = {
             'peptide': peptide_encoding,
         }
-        pseudosequence_length = None
-        if allele_pseudosequences is not None:
-            pseudosequences_input = self.pseudosequence_to_network_input(
-                allele_pseudosequences)
-            pseudosequence_length = len(pseudosequences_input[0])
-            x_dict_without_random_negatives['pseudosequence'] = (
-                pseudosequences_input)
+        allele_encoding_dims = None
+        if allele_encoding is not None:
+            allele_encoding_input = self.allele_encoding_to_network_input(
+                allele_encoding)
+            allele_encoding_dims = allele_encoding_input.shape[1:]
+            x_dict_without_random_negatives['allele'] = allele_encoding_input
+
+        # Shuffle y_values and the contents of x_dict_without_random_negatives
+        # This ensures different data is used for the test set for early stopping
+        # when multiple models are trained.
+        if shuffle_permutation is None:
+            shuffle_permutation = numpy.random.permutation(len(y_values))
+        y_values = y_values[shuffle_permutation]
+        peptide_encoding = peptide_encoding[shuffle_permutation]
+        adjusted_inequalities = adjusted_inequalities[shuffle_permutation]
+        for key in x_dict_without_random_negatives:
+            x_dict_without_random_negatives[key] = (
+                x_dict_without_random_negatives[key][shuffle_permutation])
+        if sample_weights is not None:
+            sample_weights = sample_weights[shuffle_permutation]
+
+        if self.hyperparameters['loss'].startswith("custom:"):
+            # Using a custom loss that supports inequalities
+            try:
+                custom_loss = CUSTOM_LOSSES[
+                    self.hyperparameters['loss'].replace("custom:", "")
+                ]
+            except KeyError:
+                raise ValueError(
+                    "No such custom loss function: %s. Supported losses are: %s" % (
+                        self.hyperparameters['loss'],
+                        ", ".join([
+                            "custom:" + loss_name for loss_name in CUSTOM_LOSSES
+                        ])))
+            loss_name_or_function = custom_loss.loss
+            loss_supports_inequalities = custom_loss.supports_inequalities
+            loss_encode_y_function = custom_loss.encode_y
+        else:
+            # Using a regular keras loss. No inequalities supported.
+            loss_name_or_function = self.hyperparameters['loss']
+            loss_supports_inequalities = False
+            loss_encode_y_function = None
+
+        if not loss_supports_inequalities and (
+                any(inequality != "=" for inequality in adjusted_inequalities)):
+            raise ValueError("Loss %s does not support inequalities" % (
+                loss_name_or_function))
 
         if self.network() is None:
             self._network = self.make_network(
-                pseudosequence_length=pseudosequence_length,
+                allele_encoding_dims=allele_encoding_dims,
                 **self.network_hyperparameter_defaults.subselect(
                     self.hyperparameters))
-            self.compile()
-
-        y_dict_with_random_negatives = {
-            "output": numpy.concatenate([
-                from_ic50(
-                    numpy.random.uniform(
-                        self.hyperparameters[
-                            'random_negative_affinity_min'],
-                        self.hyperparameters[
-                            'random_negative_affinity_max'],
-                        int(num_random_negative.sum()))),
-                y_values,
-            ]),
-        }
+            self.network().compile(
+                loss=loss_name_or_function,
+                optimizer=self.hyperparameters['optimizer'])
+
+        if loss_supports_inequalities:
+            # Do not sample negative affinities: just use an inequality.
+            random_negative_ic50 = self.hyperparameters['random_negative_affinity_min']
+            random_negative_target = from_ic50(random_negative_ic50)
+
+            y_dict_with_random_negatives = {
+                "output": numpy.concatenate([
+                    numpy.tile(
+                        random_negative_target, int(num_random_negative.sum())),
+                    y_values,
+                ]),
+            }
+            # Note: we are using "<" here not ">" because the inequalities are
+            # now in target-space (0-1) not affinity-space.
+            adjusted_inequalities_with_random_negatives = (
+                ["<"] * int(num_random_negative.sum()) +
+                list(adjusted_inequalities))
+        else:
+            # Randomly sample random negative affinities
+            y_dict_with_random_negatives = {
+                "output": numpy.concatenate([
+                    from_ic50(
+                        numpy.random.uniform(
+                            self.hyperparameters[
+                                'random_negative_affinity_min'],
+                            self.hyperparameters[
+                                'random_negative_affinity_max'],
+                            int(num_random_negative.sum()))),
+                    y_values,
+                ]),
+            }
         if sample_weights is not None:
             sample_weights_with_random_negatives = numpy.concatenate([
                 numpy.ones(int(num_random_negative.sum())),
@@ -429,6 +568,11 @@ class Class1NeuralNetwork(object):
         else:
             sample_weights_with_random_negatives = None
 
+        if loss_encode_y_function is not None:
+            y_dict_with_random_negatives['output'] = loss_encode_y_function(
+                y_dict_with_random_negatives['output'],
+                adjusted_inequalities_with_random_negatives)
+
         val_losses = []
         min_val_loss_iteration = None
         min_val_loss = None
@@ -436,6 +580,7 @@ class Class1NeuralNetwork(object):
         self.loss_history = collections.defaultdict(list)
         start = time.time()
         last_progress_print = None
+        x_dict_with_random_negatives = {}
         for i in range(self.hyperparameters['max_epochs']):
             random_negative_peptides_list = []
             for (length, count) in num_random_negative.iteritems():
@@ -444,21 +589,45 @@ class Class1NeuralNetwork(object):
                         count,
                         length=length,
                         distribution=aa_distribution))
+            random_negative_peptides = EncodableSequences.create(
+                random_negative_peptides_list)
             random_negative_peptides_encoding = (
-                self.peptides_to_network_input(
-                    random_negative_peptides_list))
-
-            x_dict_with_random_negatives = {
-                "peptide": numpy.concatenate([
-                    random_negative_peptides_encoding,
-                    peptide_encoding,
-                ]) if len(random_negative_peptides_encoding) > 0
-                else peptide_encoding
-            }
-            if pseudosequence_length:
-                # TODO: add random pseudosequences for random negative peptides
-                raise NotImplementedError(
-                    "Allele pseudosequences unsupported with random negatives")
+                self.peptides_to_network_input(random_negative_peptides))
+
+            if not x_dict_with_random_negatives:
+                if len(random_negative_peptides) > 0:
+                    x_dict_with_random_negatives["peptide"] = numpy.concatenate([
+                        random_negative_peptides_encoding,
+                        peptide_encoding,
+                    ])
+                    if 'allele' in x_dict_without_random_negatives:
+                        x_dict_with_random_negatives['allele'] = numpy.concatenate([
+                            x_dict_without_random_negatives['allele'][
+                                numpy.random.choice(
+                                    x_dict_without_random_negatives[
+                                        'allele'].shape[0],
+                                    size=len(random_negative_peptides_list))],
+                            x_dict_without_random_negatives['allele']
+                        ])
+                else:
+                    x_dict_with_random_negatives = (
+                        x_dict_without_random_negatives)
+            else:
+                # Update x_dict_with_random_negatives in place.
+                # This is more memory efficient than recreating it as above.
+                if len(random_negative_peptides) > 0:
+                    x_dict_with_random_negatives["peptide"][:len(random_negative_peptides)] = (
+                        random_negative_peptides_encoding
+                    )
+                    if 'allele' in x_dict_with_random_negatives:
+                        x_dict_with_random_negatives['allele'][:len(random_negative_peptides)] = (
+                            x_dict_with_random_negatives['allele'][
+                                len(random_negative_peptides) + numpy.random.choice(
+                                    x_dict_with_random_negatives['allele'].shape[0] -
+                                    len(random_negative_peptides),
+                                    size=len(random_negative_peptides))
+                            ]
+                        )
 
             fit_history = self.network().fit(
                 x_dict_with_random_negatives,
@@ -499,7 +668,7 @@ class Class1NeuralNetwork(object):
                         self.hyperparameters['patience'])
                     if i > threshold:
                         print((progress_preamble + " " +
-                            "Early stopping at epoch %3d / %3d: loss=%g. "
+                            "Stopping at epoch %3d / %3d: loss=%g. "
                             "Min val loss (%s) at epoch %s" % (
                                 i,
                                 self.hyperparameters['max_epochs'],
@@ -509,7 +678,7 @@ class Class1NeuralNetwork(object):
                         break
         self.fit_seconds = time.time() - start
 
-    def predict(self, peptides, allele_pseudosequences=None, batch_size=4096):
+    def predict(self, peptides, allele_encoding=None, batch_size=4096):
         """
         Predict affinities
         
@@ -517,7 +686,7 @@ class Class1NeuralNetwork(object):
         ----------
         peptides : EncodableSequences or list of string
         
-        allele_pseudosequences : EncodableSequences or list of string, optional
+        allele_pseudosequences : AlleleEncoding, optional
             Only required when this model is a pan-allele model
 
         batch_size : int
@@ -530,33 +699,26 @@ class Class1NeuralNetwork(object):
         x_dict = {
             'peptide': self.peptides_to_network_input(peptides)
         }
-        if allele_pseudosequences is not None:
-            pseudosequences_input = self.pseudosequence_to_network_input(
-                allele_pseudosequences)
-            x_dict['pseudosequence'] = pseudosequences_input
+        if allele_encoding is not None:
+            allele_input = self.allele_encoding_to_network_input(allele_encoding)
+            x_dict['allele'] = allele_input
 
         network = self.network(borrow=True)
         raw_predictions = network.predict(x_dict, batch_size=batch_size)
         predictions = numpy.array(raw_predictions, dtype = "float64")[:,0]
         return to_ic50(predictions)
 
-    def compile(self):
-        """
-        Compile the keras model. Used internally.
-        """
-        self.network().compile(
-            **self.compile_hyperparameter_defaults.subselect(
-                self.hyperparameters))
-
     @staticmethod
     def make_network(
-            pseudosequence_length,
+            allele_encoding_dims,
             kmer_size,
             peptide_amino_acid_encoding,
-            use_embedding,
             embedding_input_dim,
             embedding_output_dim,
-            pseudosequence_use_embedding,
+            allele_dense_layer_sizes,
+            peptide_dense_layer_sizes,
+            peptide_allele_merge_method,
+            peptide_allele_merge_activation,
             layer_sizes,
             dense_layer_l1_regularization,
             dense_layer_l2_regularization,
@@ -580,7 +742,7 @@ class Class1NeuralNetwork(object):
         from keras.layers.embeddings import Embedding
         from keras.layers.normalization import BatchNormalization
 
-        if use_embedding or peptide_amino_acid_encoding == "embedding":
+        if peptide_amino_acid_encoding == "embedding":
             peptide_input = Input(
                 shape=(kmer_size,), dtype='int32', name='peptide')
             current_layer = Embedding(
@@ -600,6 +762,12 @@ class Class1NeuralNetwork(object):
 
         inputs = [peptide_input]
 
+        kernel_regularizer = None
+        l1 = dense_layer_l1_regularization
+        l2 = dense_layer_l2_regularization
+        if l1 > 0 or l2 > 0:
+            kernel_regularizer = keras.regularizers.l1_l2(l1, l2)
+
         for (i, locally_connected_params) in enumerate(locally_connected_layers):
             current_layer = keras.layers.LocallyConnected1D(
                 name="lc_%d" % i,
@@ -607,6 +775,13 @@ class Class1NeuralNetwork(object):
 
         current_layer = Flatten(name="flattened_0")(current_layer)
 
+        for (i, layer_size) in enumerate(peptide_dense_layer_sizes):
+            current_layer = Dense(
+                layer_size,
+                name="peptide_dense_%d" % i,
+                kernel_regularizer=kernel_regularizer,
+                activation=activation)(current_layer)
+
         if batch_normalization:
             current_layer = BatchNormalization(name="batch_norm_early")(
                 current_layer)
@@ -615,37 +790,41 @@ class Class1NeuralNetwork(object):
             current_layer = Dropout(dropout_probability, name="dropout_early")(
                 current_layer)
 
-        if pseudosequence_length:
-            if pseudosequence_use_embedding:
-                pseudosequence_input = Input(
-                    shape=(pseudosequence_length,),
-                    dtype='int32',
-                    name='pseudosequence')
-                pseudo_embedding_layer = Embedding(
-                    input_dim=embedding_input_dim,
-                    output_dim=embedding_output_dim,
-                    input_length=pseudosequence_length,
-                    embeddings_initializer=embedding_init_method)(
-                    pseudosequence_input)
+        if allele_encoding_dims:
+            allele_input = Input(
+                shape=allele_encoding_dims,
+                dtype='float32',
+                name='allele')
+            inputs.append(allele_input)
+            allele_embedding_layer = Flatten(name="allele_flat")(allele_input)
+
+            for (i, layer_size) in enumerate(allele_dense_layer_sizes):
+                allele_embedding_layer = Dense(
+                    layer_size,
+                    name="allele_dense_%d" % i,
+                    kernel_regularizer=kernel_regularizer,
+                    activation=activation)(allele_embedding_layer)
+
+            if peptide_allele_merge_method == 'concatenate':
+                current_layer = keras.layers.concatenate([
+                    current_layer, allele_embedding_layer
+                ], name="allele_peptide_merged")
+            elif peptide_allele_merge_method == 'multiply':
+                current_layer = keras.layers.multiply([
+                    current_layer, allele_embedding_layer
+                ], name="allele_peptide_merged")
             else:
-                pseudosequence_input = Input(
-                    shape=(pseudosequence_length, 21),
-                    dtype='float32', name='peptide')
-                pseudo_embedding_layer = pseudosequence_input
-            inputs.append(pseudosequence_input)
-            pseudo_embedding_layer = Flatten(name="flattened_1")(
-                pseudo_embedding_layer)
-
-            current_layer = keras.layers.concatenate([
-                current_layer, pseudo_embedding_layer], name="concatenated_0")
+                raise ValueError(
+                    "Unsupported peptide_allele_encoding_merge_method: %s"
+                    % peptide_allele_merge_method)
+
+            if peptide_allele_merge_activation:
+                current_layer = keras.layers.Activation(
+                    peptide_allele_merge_activation,
+                    name="alelle_peptide_merged_%s" %
+                         peptide_allele_merge_activation)(current_layer)
             
         for (i, layer_size) in enumerate(layer_sizes):
-            kernel_regularizer = None
-            l1 = dense_layer_l1_regularization
-            l2 = dense_layer_l2_regularization
-            if l1 > 0 or l2 > 0:
-                kernel_regularizer = keras.regularizers.l1_l2(l1, l2)
-
             current_layer = Dense(
                 layer_size,
                 activation=activation,
diff --git a/mhcflurry/common.py b/mhcflurry/common.py
index abc2a72c7b7efc1893f2f3630c48b3166169d5ef..942e9feac4064f6076524826bc504211e2d70130 100644
--- a/mhcflurry/common.py
+++ b/mhcflurry/common.py
@@ -2,6 +2,7 @@ from __future__ import print_function, division, absolute_import
 import collections
 import logging
 import sys
+import os
 
 import numpy
 import pandas
@@ -9,6 +10,34 @@ import pandas
 from . import amino_acid
 
 
+def set_keras_backend(backend):
+    """
+    Configure Keras backend to use GPU or CPU. Only tensorflow is supported.
+
+    Must be called before Keras has been imported.
+
+    backend must be 'tensorflow-cpu' or 'tensorflow-gpu'.
+    """
+    os.environ["KERAS_BACKEND"] = "tensorflow"
+
+    if backend == "tensorflow-cpu":
+        print("Forcing tensorflow/CPU backend.")
+        os.environ["CUDA_VISIBLE_DEVICES"] = ""
+        device_count = {'CPU': 1, 'GPU': 0}
+    elif backend == "tensorflow-gpu":
+        print("Forcing tensorflow/GPU backend.")
+        device_count = {'CPU': 0, 'GPU': 1}
+    else:
+        raise ValueError("Unsupported backend: %s" % backend)
+
+    import tensorflow
+    from keras import backend as K
+    config = tensorflow.ConfigProto(
+        device_count=device_count)
+    session = tensorflow.Session(config=config)
+    K.set_session(session)
+
+
 def configure_logging(verbose=False):
     """
     Configure logging module using defaults.
diff --git a/mhcflurry/custom_loss.py b/mhcflurry/custom_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc885e003af6ec5105db900efaf3709df429f88e
--- /dev/null
+++ b/mhcflurry/custom_loss.py
@@ -0,0 +1,96 @@
+"""
+Custom loss functions.
+
+For losses supporting inequalities, each training data point is associated with
+one of (=), (<), or (>). For e.g. (>) inequalities, penalization is applied only
+if the prediction is less than the given value.
+"""
+
+import pandas
+from numpy import isnan, array
+
+CUSTOM_LOSSES = {}
+
+
+class MSEWithInequalities(object):
+    """
+    Supports training a regressor on data that includes inequalities
+    (e.g. x < 100). Mean square error is used as the loss for elements with
+    an (=) inequality. For elements with e.g. a (> 0.5) inequality, then the loss
+    for that element is (y - 0.5)^2 (standard MSE) if y < 500 and 0 otherwise.
+
+    This loss assumes that the normal range for y_true and y_pred is 0 - 1. As a
+    hack, the implementation uses other intervals for y_pred to encode the
+    inequality information.
+
+    y_true is interpreted as follows:
+
+    between 0 - 1
+       Regular MSE loss is used. Penality (y_pred - y_true)**2 is applied if
+       y_pred is greater or less than y_true.
+
+    between 2 - 3:
+       Treated as a "<" inequality. Penality (y_pred - (y_true - 2))**2 is
+       applied only if y_pred is greater than y_true - 2.
+
+    between 4 - 5:
+       Treated as a ">" inequality. Penality (y_pred - (y_true - 4))**2 is
+       applied only if y_pred is less than y_true - 4.
+    """
+    name = "mse_with_inequalities"
+    supports_inequalities = True
+
+    @staticmethod
+    def encode_y(y, inequalities=None):
+        y = array(y, dtype="float32")
+        if isnan(y).any():
+            raise ValueError("y contains NaN")
+        if (y > 1.0).any():
+            raise ValueError("y contains values > 1.0")
+        if (y < 0.0).any():
+            raise ValueError("y contains values < 0.0")
+
+        if inequalities is None:
+            encoded = y
+        else:
+            offsets = pandas.Series(inequalities).map({
+                '=': 0,
+                '>': 2,
+                '<': 4,
+            }).values
+            if isnan(offsets).any():
+                raise ValueError("Invalid inequality. Must be =, <, or >")
+            encoded = y + offsets
+        assert not isnan(encoded).any()
+        return encoded
+
+    @staticmethod
+    def loss(y_true, y_pred):
+        # We always delay import of Keras so that mhcflurry can be imported initially
+        # without tensorflow debug output, etc.
+        from keras import backend as K
+
+        # Handle (=) inequalities
+        diff1 = y_pred - y_true
+        diff1 *= K.cast(y_true >= 0.0, "float32")
+        diff1 *= K.cast(y_true <= 1.0, "float32")
+
+        # Handle (>) inequalities
+        diff2 = y_pred - (y_true - 2.0)
+        diff2 *= K.cast(y_true >= 2.0, "float32")
+        diff2 *= K.cast(y_true <= 3.0, "float32")
+        diff2 *= K.cast(diff2 < 0.0, "float32")
+
+        # Handle (<) inequalities
+        diff3 = y_pred - (y_true - 4.0)
+        diff3 *= K.cast(y_true >= 4.0, "float32")
+        diff3 *= K.cast(diff3 > 0.0, "float32")
+
+        return (
+            K.sum(K.square(diff1), axis=-1) +
+            K.sum(K.square(diff2), axis=-1) +
+            K.sum(K.square(diff3), axis=-1))
+
+# Register custom losses.
+for cls in [MSEWithInequalities]:
+    CUSTOM_LOSSES[cls.name] = cls()
\ No newline at end of file
diff --git a/mhcflurry/downloads.py b/mhcflurry/downloads.py
index e88675ed00b78718f0687e28ed4911473593e13c..b3b7e9d3b78d421af685841f370b5a99885f595c 100644
--- a/mhcflurry/downloads.py
+++ b/mhcflurry/downloads.py
@@ -9,7 +9,7 @@ from __future__ import (
 )
 import logging
 import yaml
-from os.path import join, exists
+from os.path import join, exists, relpath
 from pipes import quote
 from os import environ
 from collections import OrderedDict
@@ -20,11 +20,14 @@ ENVIRONMENT_VARIABLES = [
     "MHCFLURRY_DATA_DIR",
     "MHCFLURRY_DOWNLOADS_CURRENT_RELEASE",
     "MHCFLURRY_DOWNLOADS_DIR",
+    "MHCFLURRY_DEFAULT_CLASS1_MODELS"
 ]
 
 _DOWNLOADS_DIR = None
 _CURRENT_RELEASE = None
 _METADATA = None
+_MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR = environ.get(
+    "MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR")
 
 
 def get_downloads_dir():
@@ -51,6 +54,37 @@ def get_downloads_metadata():
     return _METADATA
 
 
+def get_default_class1_models_dir(test_exists=True):
+    """
+    Return the absolute path to the default class1 models dir.
+
+    If environment variable MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR is set to an
+    absolute path, return that path. If it's set to a relative path (i.e. does
+    not start with /) then return that path taken to be relative to the mhcflurry
+    downloads dir.
+
+    If environment variable MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR is NOT set,
+    then return the path to downloaded models in the "models_class1" download.
+
+    Parameters
+    ----------
+
+    test_exists : boolean, optional
+        Whether to raise an exception of the path does not exist
+
+    Returns
+    -------
+    string : absolute path
+    """
+    if _MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR:
+        result = join(get_downloads_dir(), _MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR)
+        if test_exists and not exists(result):
+            raise IOError("No such directory: %s" % result)
+        return result
+    else:
+        return get_path("models_class1", "models", test_exists=test_exists)
+
+
 def get_current_release_downloads():
     """
     Return a dict of all available downloads in the current release.
diff --git a/mhcflurry/downloads.yml b/mhcflurry/downloads.yml
index cf7709b9edf7fb46c1f2ae5e7b23ebb681099380..010aaa17b696fa5dfa026762e5b45b79f11198e5 100644
--- a/mhcflurry/downloads.yml
+++ b/mhcflurry/downloads.yml
@@ -8,7 +8,7 @@
 # by name, the downloads with "default=true" are downloaded.
 
 # This should usually be the latest release.
-current-release: 1.0.0
+current-release: 1.1.0
 
 # An integer indicating what models the current MHCflurry code base is compatible
 # with. Increment this integer when changes are made to MHCflurry that would break
@@ -17,54 +17,89 @@ current-compatibility-version: 2
 
 # Add new releases here as they are made.
 releases:
+    1.1.0:
+        compatibility-version: 2
+        downloads:
+            - name: models_class1
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.1/models_class1.20180205.tar.bz2
+              default: true
+
+            - name: models_class1_no_mass_spec
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.1/models_class1_no_mass_spec.20180205.tar.bz2
+              default: true
+
+            - name: models_class1_experiments1
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.1/models_class1_experiments1.tar.bz2
+              default: false
+
+            - name: cross_validation_class1
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.1/cross_validation_class1.tar.bz2
+              default: false
+
+            - name: data_iedb
+              url: https://github.com/openvax/mhcflurry/releases/download/pre-1.0/data_iedb.tar.bz2
+              default: false
+
+            - name: data_published
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.1/data_published.tar.bz2
+              default: false
+
+            - name: data_systemhcatlas
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.1/data_systemhcatlas.tar.bz2
+              default: false
+
+            - name: data_curated
+              url: https://github.com/openvax/mhcflurry/releases/download/pre-1.1/data_curated.tar.bz2
+              default: true
+
     1.0.0:
         compatibility-version: 2
         downloads:
             - name: models_class1
-              url: http://github.com/hammerlab/mhcflurry/releases/download/pre-1.0/models_class1.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.0/models_class1.tar.bz2
               default: true
 
             - name: models_class1_experiments1
-              url: http://github.com/hammerlab/mhcflurry/releases/download/pre-1.0/models_class1_experiments1.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.0/models_class1_experiments1.tar.bz2
               default: false
 
             - name: cross_validation_class1
-              url: http://github.com/hammerlab/mhcflurry/releases/download/pre-1.0/cross_validation_class1.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.0/cross_validation_class1.tar.bz2
               default: false
 
             - name: data_iedb
-              url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0/data_iedb.tar.bz2
+              url: https://github.com/openvax/mhcflurry/releases/download/pre-1.0/data_iedb.tar.bz2
               default: false
 
             - name: data_kim2014
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.9.1/data_kim2014.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.9.1/data_kim2014.tar.bz2
               default: false
 
             - name: data_curated
-              url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0/data_curated.tar.bz2
+              url: https://github.com/openvax/mhcflurry/releases/download/pre-1.0/data_curated.tar.bz2
               default: true
 
     0.9.2:
         compatibility-version: 2
         downloads:
             - name: models_class1
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.9.2/models_class1.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.9.2/models_class1.tar.bz2
               default: true
 
             - name: data_curated
-              url: https://github.com/hammerlab/mhcflurry/releases/download/0.9.1/data_curated.tar.bz2
+              url: https://github.com/openvax/mhcflurry/releases/download/0.9.1/data_curated.tar.bz2
               default: true
 
             - name: data_kim2014
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.9.1/data_kim2014.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.9.1/data_kim2014.tar.bz2
               default: false
 
             - name: data_iedb
-              url: https://github.com/hammerlab/mhcflurry/releases/download/0.9.1/data_iedb.tar.bz2
+              url: https://github.com/openvax/mhcflurry/releases/download/0.9.1/data_iedb.tar.bz2
               default: false
 
             - name: models_class1_experiments1
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.9.2/models_class1_experiments1.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.9.2/models_class1_experiments1.tar.bz2
               default: false
 
 
@@ -72,55 +107,55 @@ releases:
         compatibility-version: 2
         downloads:
             - name: models_class1
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.9.1/models_class1.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.9.1/models_class1.tar.bz2
               default: true
 
             - name: data_curated
-              url: https://github.com/hammerlab/mhcflurry/releases/download/0.9.1/data_curated.tar.bz2
+              url: https://github.com/openvax/mhcflurry/releases/download/0.9.1/data_curated.tar.bz2
               default: true
 
             - name: data_kim2014
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.9.1/data_kim2014.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.9.1/data_kim2014.tar.bz2
               default: false
 
             - name: data_iedb
-              url: https://github.com/hammerlab/mhcflurry/releases/download/0.9.1/data_iedb.tar.bz2
+              url: https://github.com/openvax/mhcflurry/releases/download/0.9.1/data_iedb.tar.bz2
               default: false
 
             - name: models_class1_experiments1
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.9.1/models_class1_experiments1.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.9.1/models_class1_experiments1.tar.bz2
               default: false
 
     0.2.0:
         compatibility-version: 1
         downloads:
             - name: models_class1_allele_specific_ensemble
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.2.0/models_class1_allele_specific_ensemble.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.2.0/models_class1_allele_specific_ensemble.tar.bz2
               default: true
 
             - name: models_class1_allele_specific_single
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.2.0/models_class1_allele_specific_single.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.2.0/models_class1_allele_specific_single.tar.bz2
               default: false
 
             - name: data_kim2014
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.0.8/data_kim2014.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.0.8/data_kim2014.tar.bz2
               default: true
 
             - name: data_combined_iedb_kim2014
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.0.8/data_combined_iedb_kim2014.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.0.8/data_combined_iedb_kim2014.tar.bz2
               default: true
 
     0.0.8:
         compatibility-version: 1
         downloads:
             - name: models_class1_allele_specific_single
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.0.8/models_class1_allele_specific_single.no_impute.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.0.8/models_class1_allele_specific_single.no_impute.tar.bz2
               default: true
 
             - name: data_kim2014
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.0.8/data_kim2014.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.0.8/data_kim2014.tar.bz2
               default: true
 
             - name: data_combined_iedb_kim2014
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.0.8/data_combined_iedb_kim2014.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/0.0.8/data_combined_iedb_kim2014.tar.bz2
               default: true
diff --git a/mhcflurry/downloads_command.py b/mhcflurry/downloads_command.py
index 08648a99e8eb6d2c03967cfd3ebb0bd12bbdaab8..8dfc5fb667953459e51fcc0ba91659d4c378568f 100644
--- a/mhcflurry/downloads_command.py
+++ b/mhcflurry/downloads_command.py
@@ -28,6 +28,7 @@ from pipes import quote
 import errno
 import tarfile
 from tempfile import mkstemp
+from tqdm import tqdm
 try:
     from urllib.request import urlretrieve
 except ImportError:
@@ -120,6 +121,23 @@ def yes_no(boolean):
     return "YES" if boolean else "NO"
 
 
+# For progress bar on download. See https://pypi.python.org/pypi/tqdm
+class TqdmUpTo(tqdm):
+    """Provides `update_to(n)` which uses `tqdm.update(delta_n)`."""
+    def update_to(self, b=1, bsize=1, tsize=None):
+        """
+        b  : int, optional
+            Number of blocks transferred so far [default: 1].
+        bsize  : int, optional
+            Size of each block (in tqdm units) [default: 1].
+        tsize  : int, optional
+            Total size (in tqdm units). If [default: None] remains unchanged.
+        """
+        if tsize is not None:
+            self.total = tsize
+        self.update(b * bsize - self.n)  # will also set self.n = b * bsize
+
+
 def fetch_subcommand(args):
     def qprint(msg):
         if not args.quiet:
@@ -167,16 +185,18 @@ def fetch_subcommand(args):
             yes_no(item in items_to_fetch),
             info['metadata']["url"]))
 
-    # TODO: Add a download progress bar?
-    # See http://stackoverflow.com/questions/51212/how-to-write-a-download-progress-indicator-in-python
-    # Also may want to extract into somewhere temporary and then rename to
-    # making an incomplete extract if the process is killed.
+    # TODO: may want to extract into somewhere temporary and then rename to
+    # avoid making an incomplete extract if the process is killed.
     for item in items_to_fetch:
         metadata = downloads[item]['metadata']
         (temp_fd, target_path) = mkstemp()
         try:
             qprint("Downloading: %s" % metadata['url'])
-            urlretrieve(metadata['url'], target_path)
+            urlretrieve(
+                metadata['url'],
+                target_path,
+                reporthook=TqdmUpTo(
+                    unit='B', unit_scale=True, miniters=1).update_to)
             qprint("Downloaded to: %s" % quote(target_path))
 
             tar = tarfile.open(target_path, 'r:bz2')
@@ -191,7 +211,9 @@ def fetch_subcommand(args):
                     "Archive has suspicious names: %s" % bad_names)
             result_dir = get_path(item, test_exists=False)
             os.mkdir(result_dir)
-            tar.extractall(path=result_dir)
+
+            for member in tqdm(tar.getmembers(), desc='Extracting'):
+                tar.extractall(path=result_dir, members=[member])
             tar.close()
             qprint("Extracted %d files to: %s" % (
                 len(names), quote(result_dir)))
@@ -229,15 +251,13 @@ def info_subcommand(args):
 
     downloads = get_current_release_downloads()
 
-    format_string = "%-40s  %-12s   %-12s  %-20s "
-    print(format_string % (
-        "DOWNLOAD NAME", "DOWNLOADED?", "DEFAULT?", "URL"))
+    format_string = "%-40s  %-12s  %-20s "
+    print(format_string % ("DOWNLOAD NAME", "DOWNLOADED?", "URL"))
 
     for (item, info) in downloads.items():
         print(format_string % (
             item,
             yes_no(info['downloaded']),
-            yes_no(info['metadata']['default']),
             info['metadata']["url"]))
 
 
diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py
index aab2b0fe1874b288033afa4419de1919e29f4e1e..6dcd5e6328009baa6e4e3da4a806449f3fa83cfd 100644
--- a/mhcflurry/encodable_sequences.py
+++ b/mhcflurry/encodable_sequences.py
@@ -104,7 +104,6 @@ class EncodableSequences(object):
         -------
         numpy.array with shape (num sequences, max_length, m) where m is
         vector_encoding_length(vector_encoding_name)
-
         """
         cache_key = (
             "fixed_length_vector_encoding",
diff --git a/mhcflurry/ensemble_centrality.py b/mhcflurry/ensemble_centrality.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a812d7656b40099f8bec67e7a0d5832c9cc677f
--- /dev/null
+++ b/mhcflurry/ensemble_centrality.py
@@ -0,0 +1,37 @@
+"""
+Measures of centrality (e.g. mean) used to combine predictions across an
+ensemble. The input to these functions are log affinities, and they are expected
+to return a centrality measure also in log-space.
+"""
+
+import numpy
+from functools import partial
+
+
+def robust_mean(log_values):
+    """
+    Mean of values falling within the 25-75 percentiles.
+
+    Parameters
+    ----------
+    log_values : 2-d numpy.array
+        Center is computed along the second axis (i.e. per row).
+
+    Returns
+    -------
+    center : numpy.array of length log_values.shape[1]
+
+    """
+    if log_values.shape[1] <= 3:
+        # Too few values to use robust mean.
+        return numpy.nanmean(log_values, axis=1)
+    mask = (
+        (log_values <= numpy.nanpercentile(log_values, 75, axis=1).reshape((-1, 1))) &
+        (log_values >= numpy.nanpercentile(log_values, 25, axis=1).reshape((-1, 1))))
+    return (log_values * mask.astype(float)).sum(1) / mask.sum(1)
+
+CENTRALITY_MEASURES = {
+    "mean": partial(numpy.nanmean, axis=1),
+    "median": partial(numpy.nanmedian, axis=1),
+    "robust_mean": robust_mean,
+}
\ No newline at end of file
diff --git a/mhcflurry/predict_command.py b/mhcflurry/predict_command.py
index 3eb9f16fe2b6fb7d40e37001ee306bcfd3b5fa11..61f64e4ffd3413f95b7356c7c700fc4a6defa436 100644
--- a/mhcflurry/predict_command.py
+++ b/mhcflurry/predict_command.py
@@ -33,8 +33,9 @@ import logging
 
 import pandas
 
-from .downloads import get_path
+from .downloads import get_default_class1_models_dir
 from .class1_affinity_predictor import Class1AffinityPredictor
+from .version import __version__
 
 
 parser = argparse.ArgumentParser(
@@ -61,7 +62,11 @@ helper_args.add_argument(
     default=False,
     help="Prints the list of supported peptide lengths and exits"
 )
-
+helper_args.add_argument(
+    "--version",
+    action="version",
+    version="mhcflurry %s" % __version__,
+)
 
 input_args = parser.add_argument_group(title="Required input arguments")
 input_args.add_argument(
@@ -109,6 +114,11 @@ output_args.add_argument(
     metavar="NAME",
     default="mhcflurry_",
     help="Prefix for output column names. Default: '%(default)s'")
+output_args.add_argument(
+    "--output-delimiter",
+    metavar="CHAR",
+    default=",",
+    help="Delimiter character for results. Default: '%(default)s'")
 
 
 model_args = parser.add_argument_group(title="Optional model settings")
@@ -117,7 +127,7 @@ model_args.add_argument(
     metavar="DIR",
     default=None,
     help="Directory containing models. "
-    "Default: %s" % get_path("models_class1", "models", test_exists=False))
+    "Default: %s" % get_default_class1_models_dir(test_exists=False))
 model_args.add_argument(
     "--include-individual-model-predictions",
     action="store_true",
@@ -129,12 +139,16 @@ model_args.add_argument(
 def run(argv=sys.argv[1:]):
     args = parser.parse_args(argv)
 
+    # It's hard to pass a tab in a shell, so we correct a common error:
+    if args.output_delimiter == "\\t":
+        args.output_delimiter = "\t"
+
     models_dir = args.models
     if models_dir is None:
         # The reason we set the default here instead of in the argument parser is that
         # we want to test_exists at this point, so the user gets a message instructing
         # them to download the models if needed.
-        models_dir = get_path("models_class1", "models")
+        models_dir = get_default_class1_models_dir(test_exists=True)
     predictor = Class1AffinityPredictor.load(models_dir)
 
     # The following two are informative commands that can come 
@@ -200,7 +214,7 @@ def run(argv=sys.argv[1:]):
             df[args.prediction_column_prefix + col] = predictions[col]
 
     if args.out:
-        df.to_csv(args.out, index=False)
+        df.to_csv(args.out, index=False, sep=args.output_delimiter)
         print("Wrote: %s" % args.out)
     else:
-        df.to_csv(sys.stdout, index=False)
+        df.to_csv(sys.stdout, index=False, sep=args.output_delimiter)
diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py
index 6602761bf7c6a096d7c23438543f297fb8356bb5..30a8ae6934461631bec68fad5d8c6fcb4467cc07 100644
--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
@@ -8,13 +8,31 @@ import sys
 import time
 import traceback
 from multiprocessing import Pool
+from functools import partial
+from pprint import pprint
 
 import pandas
 import yaml
 from mhcnames import normalize_allele_name
+import tqdm  # progress bar
 
-from mhcflurry.class1_affinity_predictor import Class1AffinityPredictor
-from mhcflurry.common import configure_logging
+from .class1_affinity_predictor import Class1AffinityPredictor
+from .common import configure_logging, set_keras_backend
+
+
+# To avoid pickling large matrices to send to child processes when running in
+# parallel, we use this global variable as a place to store data. Data that is
+# stored here before creating the thread pool will be inherited to the child
+# processes upon fork() call, allowing us to share large data with the workers
+# efficiently.
+GLOBAL_DATA = {}
+
+
+# Note on parallelization:
+# It seems essential currently (tensorflow==1.4.1) that no processes are forked
+# after tensorflow has been used at all, which includes merely importing
+# keras.backend. So we must make sure not to use tensorflow in the main process
+# if we are running in parallel.
 
 parser = argparse.ArgumentParser(usage=__doc__)
 
@@ -52,6 +70,11 @@ parser.add_argument(
     action="store_true",
     default=False,
     help="Use only quantitative training data")
+parser.add_argument(
+    "--ignore-inequalities",
+    action="store_true",
+    default=False,
+    help="Do not use affinity value inequalities even when present in data")
 parser.add_argument(
     "--percent-rank-calibration-num-peptides-per-length",
     type=int,
@@ -79,22 +102,33 @@ parser.add_argument(
     help="Keras verbosity. Default: %(default)s",
     default=0)
 parser.add_argument(
-    "--parallelization-num-jobs",
-    default=1,
+    "--num-jobs",
+    default=[1],
     type=int,
     metavar="N",
-    help="Parallelization jobs. Experimental. Does NOT work with tensorflow. "
-    "Set to 1 for serial run. Set to 0 to use number of cores. "
-    "Default: %(default)s.")
+    nargs="+",
+    help="Number of processes to parallelize training and percent rank "
+    "calibration over, respectively. Experimental. If only one value is specified "
+    "then the same number of jobs is used for both phases."
+    "Set to 1 for serial run. Set to 0 to use number of cores. Default: %(default)s.")
+parser.add_argument(
+    "--backend",
+    choices=("tensorflow-gpu", "tensorflow-cpu"),
+    help="Keras backend. If not specified will use system default.")
 
 
 def run(argv=sys.argv[1:]):
+    global GLOBAL_DATA
+
     # On sigusr1 print stack trace
     print("To show stack trace, run:\nkill -s USR1 %d" % os.getpid())
     signal.signal(signal.SIGUSR1, lambda sig, frame: traceback.print_stack())
 
     args = parser.parse_args(argv)
 
+    if args.backend:
+        set_keras_backend(args.backend)
+
     configure_logging(verbose=args.verbosity > 1)
 
     hyperparameters_lst = yaml.load(open(args.hyperparameters))
@@ -115,6 +149,10 @@ def run(argv=sys.argv[1:]):
         ]
         print("Subselected to quantitative: %s" % (str(df.shape)))
 
+    if args.ignore_inequalities and "measurement_inequality" in df.columns:
+        print("Dropping measurement_inequality column")
+        del df["measurement_inequality"]
+
     allele_counts = df.allele.value_counts()
 
     if args.allele:
@@ -130,15 +168,18 @@ def run(argv=sys.argv[1:]):
     print("Selected %d alleles: %s" % (len(alleles), ' '.join(alleles)))
     print("Training data: %s" % (str(df.shape)))
 
+    GLOBAL_DATA["train_data"] = df
+
     predictor = Class1AffinityPredictor()
-    if args.parallelization_num_jobs == 1:
+    if args.num_jobs[0] == 1:
         # Serial run
+        print("Running in serial.")
         worker_pool = None
     else:
         worker_pool = Pool(
             processes=(
-                args.parallelization_num_jobs
-                if args.parallelization_num_jobs else None))
+                args.num_jobs[0]
+                if args.num_jobs[0] else None))
         print("Using worker pool: %s" % str(worker_pool))
 
     if args.out_models_dir and not os.path.exists(args.out_models_dir):
@@ -146,6 +187,8 @@ def run(argv=sys.argv[1:]):
         os.mkdir(args.out_models_dir)
         print("Done.")
 
+    start = time.time()
+    work_items = []
     for (h, hyperparameters) in enumerate(hyperparameters_lst):
         n_models = None
         if 'n_models' in hyperparameters:
@@ -158,10 +201,7 @@ def run(argv=sys.argv[1:]):
         if args.max_epochs:
             hyperparameters['max_epochs'] = args.max_epochs
 
-        work_items = []
-        total_data_to_train_on = 0
-        for (i, (allele, sub_df)) in enumerate(df.groupby("allele")):
-            total_data_to_train_on += len(sub_df) * n_models
+        for (i, allele) in enumerate(df.allele.unique()):
             for model_group in range(n_models):
                 work_dict = {
                     'model_group': model_group,
@@ -171,7 +211,7 @@ def run(argv=sys.argv[1:]):
                     'hyperparameter_set_num': h,
                     'num_hyperparameter_sets': len(hyperparameters_lst),
                     'allele': allele,
-                    'data': sub_df,
+                    'data': None,  # subselect from GLOBAL_DATA["train_data"]
                     'hyperparameters': hyperparameters,
                     'verbose': args.verbosity,
                     'predictor': predictor if not worker_pool else None,
@@ -179,55 +219,114 @@ def run(argv=sys.argv[1:]):
                 }
                 work_items.append(work_dict)
 
-        if worker_pool:
-            print("Processing %d work items in parallel." % len(work_items))
-            predictors = worker_pool.map(work_entrypoint, work_items, chunksize=1)
-            print("Merging %d predictors fit in parallel." % (len(predictors)))
-            predictor = Class1AffinityPredictor.merge([predictor] + predictors)
-            print("Saving merged predictor to: %s" % args.out_models_dir)
-            predictor.save(args.out_models_dir)
-        else:
-            # Run in serial. In this case, every worker is passed the same predictor,
-            # which it adds models to, so no merging is required. It also saves
-            # as it goes so no saving is required at the end.
-            start = time.time()
-            data_trained_on = 0
-            while work_items:
-                item = work_items.pop(0)
-                work_predictor = work_entrypoint(item)
-                assert work_predictor is predictor
-
-                # When running in serial we try to estimate time remaining.
-                data_trained_on += len(item['data'])
-                progress = float(data_trained_on) / total_data_to_train_on
-                time_elapsed = time.time() - start
-                total_time = time_elapsed / progress
-                print(
-                    "Estimated total training time: %0.2f min, "
-                    "remaining: %0.2f min" % (
-                        total_time / 60,
-                        (total_time - time_elapsed) / 60))
+    if worker_pool:
+        print("Processing %d work items in parallel." % len(work_items))
+
+        # We sort here so the predictors are in order of hyperparameter set num.
+        # This is convenient so that the neural networks get merged for each
+        # allele in the same order.
+        predictors = [
+            predictor for (_, predictor)
+            in sorted(
+                tqdm.tqdm(
+                    worker_pool.imap_unordered(
+                        train_model_entrypoint, work_items, chunksize=1),
+                    total=len(work_items)),
+                key=lambda pair: pair[0])
+        ]
 
+        print("Merging %d predictors fit in parallel." % (len(predictors)))
+        predictor = Class1AffinityPredictor.merge([predictor] + predictors)
+        print("Saving merged predictor to: %s" % args.out_models_dir)
+        predictor.save(args.out_models_dir)
+    else:
+        # Run in serial. In this case, every worker is passed the same predictor,
+        # which it adds models to, so no merging is required. It also saves
+        # as it goes so no saving is required at the end.
+        for _ in tqdm.trange(len(work_items)):
+            item = work_items.pop(0)  # want to keep freeing up memory
+            (_, work_predictor) = train_model_entrypoint(item)
+            assert work_predictor is predictor
+        assert not work_items
+
+    print("*" * 30)
+    training_time = time.time() - start
+    print("Trained affinity predictor with %d networks in %0.2f min." % (
+        len(predictor.neural_networks), training_time / 60.0))
+    print("*" * 30)
 
     if worker_pool:
         worker_pool.close()
         worker_pool.join()
+        worker_pool = None
 
+    start = time.time()
     if args.percent_rank_calibration_num_peptides_per_length > 0:
-        start = time.time()
-        print("Performing percent rank calibration.")
-        predictor.calibrate_percentile_ranks(
+        alleles = list(predictor.supported_alleles)
+
+        print("Performing percent rank calibration. Encoding peptides.")
+        encoded_peptides = predictor.calibrate_percentile_ranks(
+            alleles=[],  # don't actually do any calibration, just return peptides
             num_peptides_per_length=args.percent_rank_calibration_num_peptides_per_length)
-        print("Finished calibrating percent ranks in %0.2f sec." % (
+
+        # Now we encode the peptides for each neural network, so the encoding
+        # becomes cached.
+        for network in predictor.neural_networks:
+            network.peptides_to_network_input(encoded_peptides)
+        assert encoded_peptides.encoding_cache  # must have cached the encoding
+        print("Finished encoding peptides for percent ranks in %0.2f sec." % (
             time.time() - start))
+        print("Calibrating percent rank calibration for %d alleles." % len(alleles))
+
+        if args.num_jobs[-1] == 1:
+            # Serial run
+            print("Running in serial.")
+            worker_pool = None
+            results = (
+                calibrate_percentile_ranks(
+                    allele=allele,
+                    predictor=predictor,
+                    peptides=encoded_peptides)
+                for allele in alleles)
+        else:
+            # Parallel run
+            # Store peptides in global variable so they are in shared memory
+            # after fork, instead of needing to be pickled.
+            GLOBAL_DATA["calibration_peptides"] = encoded_peptides
+            worker_pool = Pool(
+                processes=(
+                    args.num_jobs[-1]
+                    if args.num_jobs[-1] else None))
+            print("Using worker pool: %s" % str(worker_pool))
+            results = worker_pool.imap_unordered(
+                partial(
+                    calibrate_percentile_ranks,
+                    predictor=predictor),
+                alleles,
+                chunksize=1)
+
+        for result in tqdm.tqdm(results, total=len(alleles)):
+            predictor.allele_to_percent_rank_transform.update(result)
+
+        print("Done calibrating %d additional alleles." % len(alleles))
         predictor.save(args.out_models_dir, model_names_to_write=[])
 
+    percent_rank_calibration_time = time.time() - start
 
-def work_entrypoint(item):
-    return process_work(**item)
+    if worker_pool:
+        worker_pool.close()
+        worker_pool.join()
+
+    print("Train time: %0.2f min. Percent rank calibration time: %0.2f min." % (
+        training_time / 60.0, percent_rank_calibration_time / 60.0))
+    print("Predictor written to: %s" % args.out_models_dir)
+
+
+def train_model_entrypoint(item):
+    return train_model(**item)
 
 
-def process_work(
+def train_model(
         model_group,
         n_models,
         allele_num,
@@ -244,6 +343,10 @@ def process_work(
     if predictor is None:
         predictor = Class1AffinityPredictor()
 
+    if data is None:
+        full_data = GLOBAL_DATA["train_data"]
+        data = full_data.loc[full_data.allele == allele]
+
     progress_preamble = (
         "[%2d / %2d hyperparameters] "
         "[%4d / %4d alleles] "
@@ -259,22 +362,42 @@ def process_work(
     train_data = data.sample(frac=1.0)
     (model,) = predictor.fit_allele_specific_predictors(
         n_models=1,
-        architecture_hyperparameters=hyperparameters,
+        architecture_hyperparameters_list=[hyperparameters],
         allele=allele,
         peptides=train_data.peptide.values,
         affinities=train_data.measurement_value.values,
+        inequalities=(
+            train_data.measurement_inequality.values
+            if "measurement_inequality" in train_data.columns else None),
         models_dir_for_save=save_to,
         progress_preamble=progress_preamble,
         verbose=verbose)
 
     if allele_num == 0 and model_group == 0:
         # For the first model for the first allele, print the architecture.
+        print("*** HYPERPARAMETER SET %d***" %
+              (hyperparameter_set_num + 1))
+        pprint(hyperparameters)
         print("*** ARCHITECTURE FOR HYPERPARAMETER SET %d***" %
               (hyperparameter_set_num + 1))
         model.network(borrow=True).summary()
 
-    return predictor
-
+    return (hyperparameter_set_num, predictor)
+
+
+def calibrate_percentile_ranks(allele, predictor, peptides=None):
+    """
+    Private helper function.
+    """
+    global GLOBAL_DATA
+    if peptides is None:
+        peptides = GLOBAL_DATA["calibration_peptides"]
+    predictor.calibrate_percentile_ranks(
+        peptides=peptides,
+        alleles=[allele])
+    return {
+        allele: predictor.allele_to_percent_rank_transform[allele],
+    }
 
 
 if __name__ == '__main__':
diff --git a/mhcflurry/version.py b/mhcflurry/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..6849410aae0a8010e76d5f0a44ced13d750b0989
--- /dev/null
+++ b/mhcflurry/version.py
@@ -0,0 +1 @@
+__version__ = "1.1.0"
diff --git a/requirements.txt b/requirements.txt
index 746d97f072e3bc272758b9eb35c82e2e2f7040d0..da0ceda3a648a43ba10b5d6b82c760e5152c1574 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ appdirs
 scikit-learn
 mhcnames
 pyyaml
+tqdm
diff --git a/setup.py b/setup.py
index 0a2f0e49f105d8b18278eb7031ec1e8d163c32ba..9076caf1e6b6dc55053783d966cb3c9b5d68d912 100644
--- a/setup.py
+++ b/setup.py
@@ -37,10 +37,10 @@ try:
     import pypandoc
     readme = pypandoc.convert(readme, to='rst', format='md')
 except:
-    logging.warn("Conversion of long_description from MD to RST failed")
+    logging.warning("Conversion of long_description from MD to RST failed")
     pass
 
-with open('mhcflurry/__init__.py', 'r') as f:
+with open('mhcflurry/version.py', 'r') as f:
     version = re.search(
         r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
         f.read(),
@@ -57,6 +57,7 @@ if __name__ == '__main__':
         'scikit-learn',
         'mhcnames',
         'pyyaml',
+        'tqdm',
     ]
     if PY2:
         # concurrent.futures is a standard library in Py3 but Py2
diff --git a/test/test_class1_affinity_predictor.py b/test/test_class1_affinity_predictor.py
index 65f1f587316b9c56c1c30dfa0ff4c828c88c9648..40ac8c2f9ea22b1b6ad80495da061bd6b0acf5c5 100644
--- a/test/test_class1_affinity_predictor.py
+++ b/test/test_class1_affinity_predictor.py
@@ -51,7 +51,7 @@ def test_a1_known_epitopes_in_newly_trained_model():
     allele = "HLA-A*01:01"
     df = pandas.read_csv(
         get_path(
-            "data_curated", "curated_training_data.csv.bz2"))
+            "data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
     df = df.ix[
         (df.allele == allele) &
         (df.peptide.str.len() >= 8) &
@@ -92,10 +92,11 @@ def test_a1_known_epitopes_in_newly_trained_model():
     predictor = Class1AffinityPredictor()
     predictor.fit_allele_specific_predictors(
         n_models=2,
-        architecture_hyperparameters=hyperparameters,
+        architecture_hyperparameters_list=[hyperparameters],
         allele=allele,
         peptides=df.peptide.values,
         affinities=df.measurement_value.values,
+        verbose=0,
     )
 
     predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor)
@@ -136,7 +137,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
 
     df = pandas.read_csv(
         get_path(
-            "data_curated", "curated_training_data.csv.bz2"))
+            "data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
     df = df.ix[
         df.allele == allele
     ]
@@ -153,10 +154,11 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
     predictor = Class1AffinityPredictor()
     predictor.fit_allele_specific_predictors(
         n_models=2,
-        architecture_hyperparameters=hyperparameters,
+        architecture_hyperparameters_list=[hyperparameters],
         allele=allele,
         peptides=df.peptide.values,
         affinities=df.measurement_value.values,
+        verbose=0,
     )
     predictor.calibrate_percentile_ranks(num_peptides_per_length=1000)
     ic50_pred = predictor.predict(df.peptide.values, allele=allele)
diff --git a/test/test_class1_neural_network.py b/test/test_class1_neural_network.py
index 835c632d7c2c4dc6cbe1f454bc8e04ee754229d2..b082f28cf89276bf842cc427d98ad7defab0985a 100644
--- a/test/test_class1_neural_network.py
+++ b/test/test_class1_neural_network.py
@@ -1,16 +1,20 @@
+from nose.tools import eq_, assert_less, assert_greater, assert_almost_equal
+
 import numpy
 import pandas
-numpy.random.seed(0)
+from numpy import testing
 
-from mhcflurry.class1_neural_network import Class1NeuralNetwork
+numpy.random.seed(0)
 
-from nose.tools import eq_
-from numpy import testing
+import logging
+logging.getLogger('tensorflow').disabled = True
 
+from mhcflurry.class1_neural_network import Class1NeuralNetwork
 from mhcflurry.downloads import get_path
+from mhcflurry.common import random_peptides
 
 
-def test_class1_affinity_predictor_a0205_training_accuracy():
+def test_class1_neural_network_a0205_training_accuracy():
     # Memorize the dataset.
     hyperparameters = dict(
         activation="tanh",
@@ -33,7 +37,7 @@ def test_class1_affinity_predictor_a0205_training_accuracy():
 
     df = pandas.read_csv(
         get_path(
-            "data_curated", "curated_training_data.csv.bz2"))
+            "data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
     df = df.ix[
         df.allele == allele
     ]
@@ -77,6 +81,109 @@ def test_class1_affinity_predictor_a0205_training_accuracy():
         dense_layer_l1_regularization=0.0,
         dropout_probability=0.0)
     predictor2 = Class1NeuralNetwork(**hyperparameters2)
-    predictor2.fit(df.peptide.values, df.measurement_value.values)
+    predictor2.fit(df.peptide.values, df.measurement_value.values, verbose=0)
     eq_(predictor.network().to_json(), predictor2.network().to_json())
 
+
+def test_inequalities():
+    # Memorize the dataset.
+    hyperparameters = dict(
+        loss="custom:mse_with_inequalities",
+        activation="tanh",
+        layer_sizes=[16],
+        max_epochs=50,
+        early_stopping=False,
+        validation_split=0.0,
+        locally_connected_layers=[
+            {
+                "filters": 8,
+                "activation": "tanh",
+                "kernel_size": 3
+            }
+        ],
+        dense_layer_l1_regularization=0.0,
+        dropout_probability=0.0)
+
+    df = pandas.DataFrame()
+    df["peptide"] = random_peptides(1000, length=9)
+
+    # First half are binders
+    df["binder"] = df.index < len(df) / 2
+    df["value"] = df.binder.map({True: 100, False: 5000})
+    df.loc[:10, "value"] = 1.0  # some strong binders
+    df["inequality1"] = "="
+    df["inequality2"] = df.binder.map({True: "<", False: "="})
+    df["inequality3"] = df.binder.map({True: "=", False: ">"})
+
+    # "A" at start of peptide indicates strong binder
+    df["peptide"] = [
+        ("C" if not row.binder else "A") + row.peptide[1:]
+        for _, row in df.iterrows()
+    ]
+
+    fit_kwargs = {'verbose': 0}
+
+    # Prediction1 uses no inequalities (i.e. all are (=))
+    predictor = Class1NeuralNetwork(**hyperparameters)
+    predictor.fit(
+        df.peptide.values,
+        df.value.values,
+        inequalities=df.inequality1.values,
+        **fit_kwargs)
+    df["prediction1"] = predictor.predict(df.peptide.values)
+
+    # Prediction2 has a (<) inequality on binders and an (=) on non-binders
+    predictor = Class1NeuralNetwork(**hyperparameters)
+    predictor.fit(
+        df.peptide.values,
+        df.value.values,
+        inequalities=df.inequality2.values,
+        **fit_kwargs)
+    df["prediction2"] = predictor.predict(df.peptide.values)
+
+    # Prediction3 has a (=) inequality on binders and an (>) on non-binders
+    predictor = Class1NeuralNetwork(**hyperparameters)
+    predictor.fit(
+        df.peptide.values,
+        df.value.values,
+        inequalities=df.inequality3.values,
+        **fit_kwargs)
+    df["prediction3"] = predictor.predict(df.peptide.values)
+
+    df_binders = df.loc[df.binder]
+    df_nonbinders = df.loc[~df.binder]
+
+    print("***** Binders: *****")
+    print(df_binders.head(5))
+
+    print("***** Non-binders: *****")
+    print(df_nonbinders.head(5))
+
+    # Binders should always be given tighter predicted affinity than non-binders
+    assert_less(df_binders.prediction1.mean(), df_nonbinders.prediction1.mean())
+    assert_less(df_binders.prediction2.mean(), df_nonbinders.prediction2.mean())
+    assert_less(df_binders.prediction3.mean(), df_nonbinders.prediction3.mean())
+
+    # prediction2 binders should be tighter on average than prediction1
+    # binders, since prediction2 has a (<) inequality for binders.
+    # Non-binders should be about the same between prediction2 and prediction1
+    assert_less(df_binders.prediction2.mean(), df_binders.prediction1.mean())
+    assert_almost_equal(
+        df_nonbinders.prediction2.mean(),
+        df_nonbinders.prediction1.mean(),
+        delta=3000)
+
+    # prediction3 non-binders should be weaker on average than prediction2 (or 1)
+    # non-binders, since prediction3 has a (>) inequality for these peptides.
+    # Binders should be about the same.
+    assert_greater(
+        df_nonbinders.prediction3.mean(),
+        df_nonbinders.prediction2.mean())
+    assert_greater(
+        df_nonbinders.prediction3.mean(),
+        df_nonbinders.prediction1.mean())
+    assert_almost_equal(
+        df_binders.prediction3.mean(),
+        df_binders.prediction1.mean(),
+        delta=3000)
+
diff --git a/test/test_custom_loss.py b/test/test_custom_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3644c66652136ca7c9f05299f1a20e6995fc8fd
--- /dev/null
+++ b/test/test_custom_loss.py
@@ -0,0 +1,72 @@
+from nose.tools import eq_, assert_less, assert_greater, assert_almost_equal
+
+import numpy
+
+numpy.random.seed(0)
+
+import logging
+logging.getLogger('tensorflow').disabled = True
+
+import keras.backend as K
+
+from mhcflurry.custom_loss import CUSTOM_LOSSES
+
+
+def evaluate_loss(loss, y_true, y_pred):
+    if K.backend() == "tensorflow":
+        session = K.get_session()
+        y_true_var = K.constant(y_true, name="y_true")
+        y_pred_var = K.constant(y_pred, name="y_pred")
+        result = loss(y_true_var, y_pred_var)
+        return result.eval(session=session)
+    elif K.backend() == "theano":
+        y_true_var = K.constant(y_true, name="y_true")
+        y_pred_var = K.constant(y_pred, name="y_pred")
+        result = loss(y_true_var, y_pred_var)
+        return result.eval()
+    else:
+        raise ValueError("Unsupported backend: %s" % K.backend())
+
+
+def test_mse_with_inequalities():
+
+    loss_obj = CUSTOM_LOSSES['mse_with_inequalities']
+
+    y_values = [0.0, 0.5, 0.8, 1.0]
+
+    adjusted_y = loss_obj.encode_y(y_values)
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, y_values)
+    eq_(loss0, 0.0)
+
+    adjusted_y = loss_obj.encode_y(y_values, [">", ">", ">", ">"])
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, y_values)
+    eq_(loss0, 0.0)
+
+    adjusted_y = loss_obj.encode_y(y_values, ["<", "<", "<", "<"])
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, y_values)
+    eq_(loss0, 0.0)
+
+    adjusted_y = loss_obj.encode_y(y_values, ["=", "<", "=", ">"])
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, y_values)
+    eq_(loss0, 0.0)
+
+    adjusted_y = loss_obj.encode_y(y_values, ["=", "<", "=", ">"])
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, [0.0, 0.4, 0.8, 1.0])
+    eq_(loss0, 0.0)
+
+    adjusted_y = loss_obj.encode_y(y_values, [">", "<", ">", ">"])
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, [0.1, 0.4, 0.9, 1.0])
+    eq_(loss0, 0.0)
+
+    adjusted_y = loss_obj.encode_y(y_values, [">", "<", ">", ">"])
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, [0.1, 0.6, 0.9, 1.0])
+    assert_greater(loss0, 0.0)
+
+    adjusted_y = loss_obj.encode_y(y_values, ["=", "<", ">", ">"])
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, [0.1, 0.6, 0.9, 1.0])
+    assert_almost_equal(loss0, 0.02)
+
+    adjusted_y = loss_obj.encode_y(y_values, ["=", "<", "=", ">"])
+    loss0 = evaluate_loss(loss_obj.loss, adjusted_y, [0.1, 0.6, 0.9, 1.0])
+    assert_almost_equal(loss0, 0.03)
+
diff --git a/test/test_train_allele_specific_models_command.py b/test/test_train_allele_specific_models_command.py
index affcde7569b0ae5766ae554f50a73122f3e3894f..12fcaa3d07614804ff004ee6110c5587163bb70d 100644
--- a/test/test_train_allele_specific_models_command.py
+++ b/test/test_train_allele_specific_models_command.py
@@ -57,12 +57,13 @@ def run_and_check(n_jobs=0):
         json.dump(HYPERPARAMETERS, fd)
 
     args = [
-        "--data", get_path("data_curated", "curated_training_data.csv.bz2"),
+        "--data", get_path("data_curated", "curated_training_data.no_mass_spec.csv.bz2"),
         "--hyperparameters", hyperparameters_filename,
         "--allele", "HLA-A*02:01", "HLA-A*01:01", "HLA-A*03:01",
         "--out-models-dir", models_dir,
         "--percent-rank-calibration-num-peptides-per-length", "10000",
-        "--parallelization-num-jobs", str(n_jobs),
+        "--num-jobs", str(n_jobs),
+        "--ignore-inequalities",
     ]
     print("Running with args: %s" % args)
     train_allele_specific_models_command.run(args)
@@ -82,6 +83,7 @@ def run_and_check(n_jobs=0):
     print("Deleting: %s" % models_dir)
     shutil.rmtree(models_dir)
 
+
 def Xtest_run_parallel():
     run_and_check(n_jobs=3)