diff --git a/docs/Makefile b/docs/Makefile
index 3dfdf85d46949236d6a6fcd33a897ee201828804..8c1bf9d5c429ffa198fed21a652998f6f5f528e5 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -56,7 +56,6 @@ generate:
 	mhcflurry-downloads fetch models_class1 cross_validation_class1
 	python generate.py \
 	    --out-models-cv-rst _build/_models_cv.rst \
-	    --out-models-architecture-png _build/_models_architecture.png \
 	    --out-models-info-rst _build/_models_info.rst \
 	    --out-models-supported-alleles-rst _build/_models_supported_alleles.rst
 
diff --git a/docs/commandline_tutorial.rst b/docs/commandline_tutorial.rst
index 6dc803c81b40657c2be83f8d5106a46654fb1c73..ade306d6f4a781df3080cad53547a5e6eee37f29 100644
--- a/docs/commandline_tutorial.rst
+++ b/docs/commandline_tutorial.rst
@@ -111,7 +111,7 @@ training data. The data we use for our released predictors can be downloaded wit
 It looks like this:
 
 .. command-output::
-    bzcat "$(mhcflurry-downloads path data_curated)/curated_training_data.csv.bz2" | head -n 3
+    bzcat "$(mhcflurry-downloads path data_curated)/curated_training_data.no_mass_spec.csv.bz2" | head -n 3
     :shell:
     :nostderr:
 
diff --git a/docs/conf.py b/docs/conf.py
index be1d9ae6aa14dab0664790112c8df8957fdd7682..98df4244ca721115c27b62f72aea8c2bf9f6613a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -86,7 +86,7 @@ author = 'Timothy O\'Donnell'
 
 # The short X.Y version.
 # Added by Tim: reading version from mhcflurry __init__.py as in setup.py
-with open('../mhcflurry/__init__.py', 'r') as f:
+with open('../mhcflurry/version.py', 'r') as f:
     version = re.search(
         r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
         f.read(),
diff --git a/docs/generate.py b/docs/generate.py
index 848d04ce5a15c0d8391a4c1bd81e90b4a5229939..fb39ade21ba133c8775cf6a6d5c06bbdd9db3b60 100644
--- a/docs/generate.py
+++ b/docs/generate.py
@@ -4,11 +4,14 @@ Generate certain RST files used in documentation.
 
 import sys
 import argparse
+import json
 from textwrap import wrap
+from collections import OrderedDict
 
 import pypandoc
 import pandas
 from keras.utils.vis_utils import plot_model
+from tabulate import tabulate
 
 from mhcflurry import __version__
 from mhcflurry.downloads import get_path
@@ -89,18 +92,66 @@ def go(argv):
         # Architecture information rst
         if predictor is None:
             predictor = Class1AffinityPredictor.load(args.class1_models_dir)
-        network = predictor.neural_networks[0].network()
-        lines = []
-        network.summary(print_fn=lines.append)
+
+        representative_networks = OrderedDict()
+        for network in predictor.neural_networks:
+            config = json.dumps(network.hyperparameters)
+            if config not in representative_networks:
+                representative_networks[config] = network
+
+        all_hyperparameters = [
+            network.hyperparameters for network in representative_networks.values()
+        ]
+        hyperparameter_keys =  all_hyperparameters[0].keys()
+        assert all(
+            hyperparameters.keys() == hyperparameter_keys
+            for hyperparameters in all_hyperparameters)
+
+        constant_hyperparameter_keys = [
+            k for k in hyperparameter_keys
+            if all([
+                hyperparameters[k] == all_hyperparameters[0][k]
+                for hyperparameters in all_hyperparameters
+            ])
+        ]
+        constant_hypeparameters = dict(
+            (key, all_hyperparameters[0][key])
+            for key in sorted(constant_hyperparameter_keys)
+        )
+
+        def write_hyperparameters(fd, hyperparameters):
+            rows = []
+            for key in sorted(hyperparameters.keys()):
+                rows.append((key, json.dumps(hyperparameters[key])))
+            fd.write("\n")
+            fd.write(
+                tabulate(rows, ["Hyperparameter", "Value"], tablefmt="grid"))
 
         with open(args.out_models_info_rst, "w") as fd:
-            fd.write("Layers and parameters summary: ")
-            fd.write("\n\n::\n\n")
-            for line in lines:
-                fd.write("    ")
-                fd.write(line)
+            fd.write("Hyperparameters shared by all %d architectures:\n" %
+                len(representative_networks))
+            write_hyperparameters(fd, constant_hypeparameters)
+            fd.write("\n")
+            for (i, network) in enumerate(representative_networks.values()):
+                lines = []
+                network.network().summary(print_fn=lines.append)
+
+                fd.write("Architecture %d / %d:\n" % (
+                    (i + 1, len(representative_networks))))
+                fd.write("+" * 40)
                 fd.write("\n")
-            print("Wrote: %s" % args.out_models_info_rst)
+                write_hyperparameters(
+                    fd,
+                    dict(
+                        (key, value)
+                        for (key, value) in network.hyperparameters.items()
+                        if key not in constant_hypeparameters))
+                fd.write("\n\n::\n\n")
+                for line in lines:
+                    fd.write("    ")
+                    fd.write(line)
+                    fd.write("\n")
+        print("Wrote: %s" % args.out_models_info_rst)
 
     if args.out_models_cv_rst:
         # Models cv output
diff --git a/docs/models.rst b/docs/models.rst
index 4f7dee9edb93dcf686b830795aad79bf93c74bd7..96f2d1243e8975a3d6a3b31d7a88bbdf903680f8 100644
--- a/docs/models.rst
+++ b/docs/models.rst
@@ -1,35 +1,28 @@
 Details on the released models
 ===============================
 
-The released MHCflurry predictor consists of an ensemble of eight models for each
-supported allele. Each model in the ensemble was trained on a random 80% sample
-of the data for the allele, and the remaining 20% was used for early stopping.
-All models use the same architecture. The predictions are taken to be the geometric
-mean of the nM binding affinity predictions of the individual models. The script
-we run to train these models is in "downloads-generation/models_class1/GENERATE.sh"
-in the repository.
-
-Neural network architecture
+The released MHCflurry predictor consists of an ensemble of models for each
+supported allele. Each model in the ensemble was trained on a random 90% sample
+of the data for the allele, and the remaining data was used for early stopping.
+The predictions are taken to be the geometric mean of the nM binding affinity
+predictions of the individual models whose predictions fall in the middle 50% of
+values for a given prediction. The script we run to train these models is in
+"downloads-generation/models_class1/GENERATE.sh" in the repository.
+
+Neural network architectures
 -------------------------------------------------------------
 
-The neural network architecture is quite simple, consisting of a locally
-connected layer, a dense layer, and a sigmoid output.
-
 .. include:: /_build/_models_info.rst
 
-Architecture diagram:
-
-.. image:: /_build/_models_architecture.png
-
-Cross validation performance
--------------------------------------------------------------
+.. Cross validation performance
+.. -------------------------------------------------------------
 
-The accuracy of the MHCflurry downloadable models was estimated using 5-fold cross
-validation on the training data. The values shown here are the mean cross validation
-scores across folds.
+.. The accuracy of the MHCflurry downloadable models was estimated using 5-fold cross
+.. validation on the training data. The values shown here are the mean cross validation
+.. scores across folds.
 
-The AUC and F1 estimates use a 500 nM cutoff for distinguishing strong-binders
-from weak- or non-binders. The Kendall Tau score gives the rank correlation
-between the predicted and measured affinities; it uses no cutoff.
+.. The AUC and F1 estimates use a 500 nM cutoff for distinguishing strong-binders
+.. from weak- or non-binders. The Kendall Tau score gives the rank correlation
+.. between the predicted and measured affinities; it uses no cutoff.
 
-.. include:: /_build/_models_cv.rst
+.. .. include:: /_build/_models_cv.rst
diff --git a/docs/python_tutorial.rst b/docs/python_tutorial.rst
index 9b3d8b882a91bea6899cb6583382d691f717c7e8..5840db46484f64b430a96fb9ec86694af0328a01 100644
--- a/docs/python_tutorial.rst
+++ b/docs/python_tutorial.rst
@@ -65,7 +65,7 @@ We can get the path to this data from Python using `mhcflurry.downloads.get_path
 .. runblock:: pycon
 
     >>> from mhcflurry.downloads import get_path
-    >>> data_path = get_path("data_curated", "curated_training_data.csv.bz2")
+    >>> data_path = get_path("data_curated", "curated_training_data.no_mass_spec.csv.bz2")
     >>> data_path
 
 Now let's load it with pandas and filter to reasonably-sized peptides:
diff --git a/docs/requirements.txt b/docs/requirements.txt
index a88547504c1d7ede3af6ca6d67010fc0614ade96..af205bbbf1b5613450cffece92b58334dd06ea34 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -7,3 +7,4 @@ numpydoc
 pypandoc
 mhctools
 pydot
+tabulate
diff --git a/mhcflurry/class1_neural_network.py b/mhcflurry/class1_neural_network.py
index c48b051ff22600e593676190226841eeb2f6df56..936e75119f6974fb6b619ad4f57461a9a7e50339 100644
--- a/mhcflurry/class1_neural_network.py
+++ b/mhcflurry/class1_neural_network.py
@@ -249,6 +249,7 @@ class Class1NeuralNetwork(object):
         result = dict(self.__dict__)
         result['_network'] = None
         result['network_weights'] = None
+        result['network_weights_loader'] = None
         return result
 
     @classmethod
@@ -277,6 +278,12 @@ class Class1NeuralNetwork(object):
         return instance
 
     def load_weights(self):
+        """
+        Load weights by evaluating self.network_weights_loader, if needed.
+
+        After calling this, self.network_weights_loader will be None and
+        self.network_weights will be the weights list, if available.
+        """
         if self.network_weights_loader:
             self.network_weights = self.network_weights_loader()
             self.network_weights_loader = None