diff --git a/downloads-generation/data_curated/GENERATE.sh b/downloads-generation/data_curated/GENERATE.sh
index f0a3d54aece76755ab92f1a6764243f014f112b4..1f2b7067488963912dea57932ff60f36fa55ac42 100755
--- a/downloads-generation/data_curated/GENERATE.sh
+++ b/downloads-generation/data_curated/GENERATE.sh
@@ -34,10 +34,23 @@ time python curate.py \
     --data-iedb \
         "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \
     --data-kim2014 \
-        "$(mhcflurry-downloads path data_kim2014)/bdata.20130222.mhci.public.1.txt" \
-    --out-csv curated_training_data.csv
+        "$(mhcflurry-downloads path data_published)/bdata.20130222.mhci.public.1.txt" \
+    --out-csv curated_training_data.no_mass_spec.csv
+
+time python curate.py \
+    --data-iedb \
+        "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \
+    --data-kim2014 \
+        "$(mhcflurry-downloads path data_published)/bdata.20130222.mhci.public.1.txt" \
+    --data-systemhc-atlas \
+        "$(mhcflurry-downloads path data_systemhcatlas)/data.csv.bz2" \
+    --data-abelin-mass-spec \
+        "$(mhcflurry-downloads path data_published)/abelin2017.hits.csv.bz2" \
+    --out-csv curated_training_data.with_mass_spec.csv
+
+bzip2 curated_training_data.no_mass_spec.csv
+bzip2 curated_training_data.with_mass_spec.csv
 
-bzip2 curated_training_data.csv
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 LOG.txt
 tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
diff --git a/downloads-generation/data_curated/curate.py b/downloads-generation/data_curated/curate.py
index 2650a5bb67c4d19d11d506318c2afe1115f7fc27..fbb503a471cf91b36b58e089435794deee1946a2 100755
--- a/downloads-generation/data_curated/curate.py
+++ b/downloads-generation/data_curated/curate.py
@@ -29,14 +29,30 @@ parser.add_argument(
     action="append",
     default=[],
     help="Path to IEDB-style affinity data (e.g. mhc_ligand_full.csv)")
+parser.add_argument(
+    "--data-systemhc-atlas",
+    action="append",
+    default=[],
+    help="Path to systemhc-atlas-style mass-spec data")
+parser.add_argument(
+    "--data-abelin-mass-spec",
+    action="append",
+    default=[],
+    help="Path to Abelin Immunity 2017 mass-spec hits")
+parser.add_argument(
+    "--include-mass-spec",
+    action="store_true",
+    default=False,
+    help="Include mass-spec observations in IEDB")
+
 parser.add_argument(
     "--out-csv",
     required=True,
     help="Result file")
 
 QUALITATIVE_TO_AFFINITY_AND_INEQUALITY = {
-    "Negative": (20000.0, ">"),
-    "Positive": (500.0, "<"),
+    "Negative": (5000.0, ">"),
+    "Positive": (500.0, "<"),  # used for mass-spec hits
     "Positive-High": (100.0, "<"),
     "Positive-Intermediate": (1000.0, "<"),
     "Positive-Low": (5000.0, "<"),
@@ -76,7 +92,58 @@ def load_data_kim2014(filename):
     return df
 
 
-def load_data_iedb(iedb_csv, include_qualitative=True):
+def load_data_systemhc_atlas(filename, min_probability=0.99):
+    df = pandas.read_csv(filename)
+    print("Loaded systemhc atlas data: %s" % str(df.shape))
+
+    df["measurement_source"] = "systemhc-atlas"
+    df["measurement_value"] = QUALITATIVE_TO_AFFINITY["Positive"]
+    df["measurement_inequality"] = "<"
+    df["measurement_type"] = "qualitative"
+    df["original_allele"] = df.top_allele
+    df["peptide"] = df.search_hit
+    df["allele"] = df.top_allele.map(normalize_allele_name)
+
+    print("Dropping un-parseable alleles: %s" % ", ".join(
+        str(x) for x in df.ix[df.allele == "UNKNOWN"]["top_allele"].unique()))
+    df = df.loc[df.allele != "UNKNOWN"]
+    print("Systemhc atlas data now: %s" % str(df.shape))
+
+    print("Dropping data points with probability < %f" % min_probability)
+    df = df.loc[df.prob >= min_probability]
+    print("Systemhc atlas data now: %s" % str(df.shape))
+
+    print("Removing duplicates")
+    df = df.drop_duplicates(["allele", "peptide"])
+    print("Systemhc atlas data now: %s" % str(df.shape))
+
+    return df
+
+
+def load_data_abelin_mass_spec(filename):
+    df = pandas.read_csv(filename)
+    print("Loaded Abelin mass-spec data: %s" % str(df.shape))
+
+    df["measurement_source"] = "abelin-mass-spec"
+    df["measurement_value"] = QUALITATIVE_TO_AFFINITY["Positive"]
+    df["measurement_inequality"] = "<"
+    df["measurement_type"] = "qualitative"
+    df["original_allele"] = df.allele
+    df["allele"] = df.original_allele.map(normalize_allele_name)
+
+    print("Dropping un-parseable alleles: %s" % ", ".join(
+        str(x) for x in df.ix[df.allele == "UNKNOWN"]["allele"].unique()))
+    df = df.loc[df.allele != "UNKNOWN"]
+    print("Abelin mass-spec data now: %s" % str(df.shape))
+
+    print("Removing duplicates")
+    df = df.drop_duplicates(["allele", "peptide"])
+    print("Abelin mass-spec data now: %s" % str(df.shape))
+
+    return df
+
+
+def load_data_iedb(iedb_csv, include_qualitative=True, include_mass_spec=False):
     iedb_df = pandas.read_csv(iedb_csv, skiprows=1, low_memory=False)
     print("Loaded iedb data: %s" % str(iedb_df.shape))
 
@@ -110,9 +177,10 @@ def load_data_iedb(iedb_csv, include_qualitative=True):
     qualitative = iedb_df.ix[iedb_df["Units"] != "nM"].copy()
     qualitative["measurement_type"] = "qualitative"
     print("Qualitative measurements: %d" % len(qualitative))
-    #qualitative = qualitative.ix[
-    #    (~qualitative["Method/Technique"].str.contains("mass spec"))
-    #].copy()
+    if not include_mass_spec:
+        qualitative = qualitative.ix[
+            (~qualitative["Method/Technique"].str.contains("mass spec"))
+        ].copy()
 
     qualitative["Quantitative measurement"] = (
         qualitative["Qualitative Measure"].map(QUALITATIVE_TO_AFFINITY))
@@ -169,7 +237,7 @@ def run():
 
     dfs = []
     for filename in args.data_iedb:
-        df = load_data_iedb(filename)
+        df = load_data_iedb(filename, include_mass_spec=args.include_mass_spec)
         dfs.append(df)
     for filename in args.data_kim2014:
         df = load_data_kim2014(filename)
@@ -185,8 +253,20 @@ def run():
             ]
             print("Kim2014 data now: %s" % str(df.shape))
         dfs.append(df)
+    for filename in args.data_systemhc_atlas:
+        df = load_data_systemhc_atlas(filename)
+        dfs.append(df)
+    for filename in args.data_abelin_mass_spec:
+        df = load_data_abelin_mass_spec(filename)
+        dfs.append(df)
 
     df = pandas.concat(dfs, ignore_index=True)
+    print("Combined df: %s" % (str(df.shape)))
+
+    print("Removing combined duplicates")
+    df = df.drop_duplicates(["allele", "peptide", "measurement_value"])
+    print("New combined df: %s" % (str(df.shape)))
+
     df = df[[
         "allele",
         "peptide",
@@ -197,7 +277,7 @@ def run():
         "original_allele",
     ]].sort_values(["allele", "peptide"]).dropna()
 
-    print("Combined df: %s" % (str(df.shape)))
+    print("Final combined df: %s" % (str(df.shape)))
 
     df.to_csv(args.out_csv, index=False)
     print("Wrote: %s" % args.out_csv)
diff --git a/downloads-generation/data_kim2014/README.md b/downloads-generation/data_kim2014/README.md
deleted file mode 100644
index bf42e01ccade69b63172d342c92a41d9e0497dcb..0000000000000000000000000000000000000000
--- a/downloads-generation/data_kim2014/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Kim 2014 Data
-
-This download contains the BD2009, BD2013, and BLIND datasets from [Dataset size and composition impact the reliability of performance benchmarks for peptide-MHC binding predictions](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241). BD2013 (augmented with more recent data from IEDB) are used to train the production MHCflurry models. BD2009 and BLIND are useful for performing validation on held-out data.
-
-These files are available on dropbox here:
-
- * https://dl.dropboxusercontent.com/u/3967524/bdata.2009.mhci.public.1.txt
- * https://dl.dropboxusercontent.com/u/3967524/bdata.20130222.mhci.public.1.txt
- * https://dl.dropboxusercontent.com/u/3967524/bdata.2013.mhci.public.blind.1.txt
-
-To generate this download run:
-
-```
-./GENERATE.sh
-```
\ No newline at end of file
diff --git a/downloads-generation/data_published/GENERATE.sh b/downloads-generation/data_published/GENERATE.sh
new file mode 100755
index 0000000000000000000000000000000000000000..916a901e160340f03f2039a640c44bf2460bff55
--- /dev/null
+++ b/downloads-generation/data_published/GENERATE.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Download some published MHC I ligand data
+#
+#
+set -e
+set -x
+
+DOWNLOAD_NAME=data_published
+SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
+SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
+
+mkdir -p "$SCRATCH_DIR"
+rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
+mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
+
+# Send stdout and stderr to a logfile included with the archive.
+exec >  >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
+exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
+
+# Log some environment info
+date
+pip freeze
+# git rev-parse HEAD
+git status
+
+cd $SCRATCH_DIR/$DOWNLOAD_NAME
+
+# Download kim2014 data
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.2009.mhci.public.1.txt
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.20130222.mhci.public.1.txt
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.2013.mhci.public.blind.1.txt
+
+# Download abelin et al 2017 data
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/abelin2017.hits.csv.bz2
+
+cp $SCRIPT_ABSOLUTE_PATH .
+bzip2 LOG.txt
+tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
+
+echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
diff --git a/downloads-generation/data_published/README.md b/downloads-generation/data_published/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..807adbef7bc6a293c4bd71f5fe1af13f33c47e5d
--- /dev/null
+++ b/downloads-generation/data_published/README.md
@@ -0,0 +1,24 @@
+# Published datasets
+
+These datasets are derived from publications and do not change.
+
+To generate this download run:
+
+```
+./GENERATE.sh
+```
+
+## Kim 2014
+
+This download contains the BD2009, BD2013, and BLIND datasets from
+[Dataset size and composition impact the reliability of performance benchmarks for peptide-MHC binding predictions](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241).
+
+BD2013 (augmented with more recent data from IEDB) are used to train the production
+MHCflurry models. BD2009 and BLIND are useful for performing validation on held-out data.
+
+
+## Abelin et al. Immunity 2017
+
+This download contains the peptides identified in
+[Mass Spectrometry Profiling of HLA-Associated Peptidomes in Mono-allelic Cells Enables More Accurate Epitope Prediction](https://www.ncbi.nlm.nih.gov/pubmed/28228285).
+
diff --git a/downloads-generation/data_kim2014/GENERATE.sh b/downloads-generation/data_systemhcatlas/GENERATE.sh
similarity index 66%
rename from downloads-generation/data_kim2014/GENERATE.sh
rename to downloads-generation/data_systemhcatlas/GENERATE.sh
index dbda0fe8c12df076e5e8f3b96728eefda51f23c6..1558409c2b981591681ae726a641e51283935ad9 100755
--- a/downloads-generation/data_kim2014/GENERATE.sh
+++ b/downloads-generation/data_systemhcatlas/GENERATE.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Download some published MHC I ligand data from a location on Dropbox.
+# Download some published MHC I ligands identified by mass-spec
 #
 #
 set -e
 set -x
 
-DOWNLOAD_NAME=data_kim2014
+DOWNLOAD_NAME=data_systemhcatlas
 SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
 SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
 
@@ -26,9 +26,9 @@ git status
 
 cd $SCRATCH_DIR/$DOWNLOAD_NAME
 
-wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.2009.mhci.public.1.txt
-wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.20130222.mhci.public.1.txt
-wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.2013.mhci.public.blind.1.txt
+wget --quiet https://github.com/openvax/mhcflurry/releases/download/pre-1.1/systemhc.20171121.combined.csv.bz2
+
+mv systemhc.20171121.combined.csv.bz2 data.csv.bz2
 
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 LOG.txt
diff --git a/downloads-generation/data_systemhcatlas/README.md b/downloads-generation/data_systemhcatlas/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5e66d9af8c400d8cceeaa4762bbb43bbea0493f8
--- /dev/null
+++ b/downloads-generation/data_systemhcatlas/README.md
@@ -0,0 +1,10 @@
+# SysteMHC database dump
+
+This is a data dump of the [SysteMHC Atlas](https://systemhcatlas.org/) provided
+by personal communication. It is distributed under the ODC Open Database License.
+
+To generate this download run:
+
+```
+./GENERATE.sh
+```
\ No newline at end of file
diff --git a/downloads-generation/models_class1/GENERATE.sh b/downloads-generation/models_class1_no_mass_spec/GENERATE.sh
similarity index 97%
rename from downloads-generation/models_class1/GENERATE.sh
rename to downloads-generation/models_class1_no_mass_spec/GENERATE.sh
index b72334b536cca453300b52257eb8e9c65c7e0dd3..3b80f704deb4975a543d030a437e92bb3afd0265 100755
--- a/downloads-generation/models_class1/GENERATE.sh
+++ b/downloads-generation/models_class1_no_mass_spec/GENERATE.sh
@@ -32,7 +32,7 @@ mkdir models
 cp $SCRIPT_DIR/hyperparameters.yaml .
 
 time mhcflurry-class1-train-allele-specific-models \
-    --data "$(mhcflurry-downloads path data_curated)/curated_training_data.csv.bz2" \
+    --data "$(mhcflurry-downloads path data_curated)/curated_training_data.no_mass_spec.csv.bz2" \
     --hyperparameters hyperparameters.yaml \
     --out-models-dir models \
     --percent-rank-calibration-num-peptides-per-length 1000000 \
diff --git a/downloads-generation/models_class1/README.md b/downloads-generation/models_class1_no_mass_spec/README.md
similarity index 100%
rename from downloads-generation/models_class1/README.md
rename to downloads-generation/models_class1_no_mass_spec/README.md
diff --git a/downloads-generation/models_class1/hyperparameters.test.json b/downloads-generation/models_class1_no_mass_spec/hyperparameters.test.json
similarity index 100%
rename from downloads-generation/models_class1/hyperparameters.test.json
rename to downloads-generation/models_class1_no_mass_spec/hyperparameters.test.json
diff --git a/downloads-generation/models_class1/hyperparameters.yaml b/downloads-generation/models_class1_no_mass_spec/hyperparameters.yaml
similarity index 100%
rename from downloads-generation/models_class1/hyperparameters.yaml
rename to downloads-generation/models_class1_no_mass_spec/hyperparameters.yaml
diff --git a/mhcflurry/downloads.yml b/mhcflurry/downloads.yml
index 70fc552e4178aa6aaf45830be13ebea69e6b2386..5b0be59fcb2c3dce4e224df58155ffc52f5bda97 100644
--- a/mhcflurry/downloads.yml
+++ b/mhcflurry/downloads.yml
@@ -20,7 +20,7 @@ releases:
     1.1.0:
         compatibility-version: 2
         downloads:
-            - name: models_class1
+            - name: models_class1_no_mass_spec
               url: http://github.com/hammerlab/mhcflurry/releases/download/pre-1.1/models_class1.20180116.tar.bz2
               default: true
 
@@ -36,12 +36,16 @@ releases:
               url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0/data_iedb.tar.bz2
               default: false
 
-            - name: data_kim2014
-              url: http://github.com/hammerlab/mhcflurry/releases/download/0.9.1/data_kim2014.tar.bz2
+            - name: data_published
+              url: http://github.com/hammerlab/mhcflurry/releases/download/pre-1.1/data_published.tar.bz2
+              default: false
+
+            - name: data_systemhcatlas
+              url: http://github.com/hammerlab/mhcflurry/releases/download/pre-1.1/data_systemhcatlas.tar.bz2
               default: false
 
             - name: data_curated
-              url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0/data_curated.tar.bz2
+              url: https://github.com/hammerlab/mhcflurry/releases/download/pre-1.1/data_curated.tar.bz2
               default: true
 
     1.0.0:
diff --git a/setup.py b/setup.py
index 0a2f0e49f105d8b18278eb7031ec1e8d163c32ba..a66f0d8255cb82c5f978d1d019004797dd1f11b9 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,7 @@ try:
     import pypandoc
     readme = pypandoc.convert(readme, to='rst', format='md')
 except:
-    logging.warn("Conversion of long_description from MD to RST failed")
+    logging.warning("Conversion of long_description from MD to RST failed")
     pass
 
 with open('mhcflurry/__init__.py', 'r') as f: