From d3b84cbc69364b7e4bba21d3f542e1b915b5e009 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Fri, 20 Sep 2019 15:07:48 -0400
Subject: [PATCH] update datasets

---
 downloads-generation/data_published/GENERATE.sh | 13 ++++++++++---
 downloads-generation/data_published/README.md   |  8 +-------
 mhcflurry/downloads.yml                         |  2 +-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/downloads-generation/data_published/GENERATE.sh b/downloads-generation/data_published/GENERATE.sh
index ea3d7239..33cc84c5 100755
--- a/downloads-generation/data_published/GENERATE.sh
+++ b/downloads-generation/data_published/GENERATE.sh
@@ -25,7 +25,7 @@ date
 cd $SCRATCH_DIR/$DOWNLOAD_NAME
 
 ############################################
-# BINDING AFFINITIES
+# BINDING AFFINITIES: class I
 ############################################
 #
 # Kim et al 2014 [PMID 25017736]
@@ -36,7 +36,7 @@ wget -q https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.201
 mkdir raw
 
 ############################################
-# MS: Multiallelic
+# MS: Multiallelic class I
 ############################################
 # Bassani-Sternberg, ..., Gfeller PLOS Comp. Bio. 2017 [PMID 28832583]
 # The first dataset is from this work. The second dataset is originally from:
@@ -84,8 +84,15 @@ wget -q https://www.mcponline.org/lookup/suppl/doi:10.1074/mcp.M116.060350/-/DC1
 # Hassan, ..., van Veelen Mol Cell Proteomics 2015 [PMID 23481700]
 PMID=23481700
 mkdir -p raw/$PMID
-wget -q https://www.mcponline.org/highwire/filestream/34681/field_highwire_adjunct_files/1/mcp.M112.024810-2.xls  -P raw/$PMID
+wget -q https://www.mcponline.org/highwire/filestream/34681/field_highwire_adjunct_files/1/mcp.M112.024810-2.xls -P raw/$PMID
 
+############################################
+# MS: Monoallelic class II
+############################################
+# Abelin, ..., Rooney Immunity 2019 [PMID 31495665]
+PMID=31495665
+mkdir -p raw/$PMID
+wget -q https://ars.els-cdn.com/content/image/1-s2.0-S1074761319303632-mmc2.xlsx -P raw/$PMID
 
 
 cp $SCRIPT_ABSOLUTE_PATH .
diff --git a/downloads-generation/data_published/README.md b/downloads-generation/data_published/README.md
index 807adbef..1e70529d 100644
--- a/downloads-generation/data_published/README.md
+++ b/downloads-generation/data_published/README.md
@@ -8,17 +8,11 @@ To generate this download run:
 ./GENERATE.sh
 ```
 
-## Kim 2014
-
 This download contains the BD2009, BD2013, and BLIND datasets from
 [Dataset size and composition impact the reliability of performance benchmarks for peptide-MHC binding predictions](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241).
 
 BD2013 (augmented with more recent data from IEDB) are used to train the production
 MHCflurry models. BD2009 and BLIND are useful for performing validation on held-out data.
 
-
-## Abelin et al. Immunity 2017
-
-This download contains the peptides identified in
-[Mass Spectrometry Profiling of HLA-Associated Peptidomes in Mono-allelic Cells Enables More Accurate Epitope Prediction](https://www.ncbi.nlm.nih.gov/pubmed/28228285).
+The other published data sets correspond to the publications indicated in GENERATE.sh.
 
diff --git a/mhcflurry/downloads.yml b/mhcflurry/downloads.yml
index 21af8784..3c731928 100644
--- a/mhcflurry/downloads.yml
+++ b/mhcflurry/downloads.yml
@@ -109,7 +109,7 @@ releases:
               default: false
 
             - name: data_published
-              url: http://github.com/openvax/mhcflurry/releases/download/pan-dev1/data_published.tar.bz2
+              url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_published.20190920.tar.bz2
               default: false
 
             - name: data_curated
-- 
GitLab