From d3b84cbc69364b7e4bba21d3f542e1b915b5e009 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Fri, 20 Sep 2019 15:07:48 -0400 Subject: [PATCH] update datasets --- downloads-generation/data_published/GENERATE.sh | 13 ++++++++++--- downloads-generation/data_published/README.md | 8 +------- mhcflurry/downloads.yml | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/downloads-generation/data_published/GENERATE.sh b/downloads-generation/data_published/GENERATE.sh index ea3d7239..33cc84c5 100755 --- a/downloads-generation/data_published/GENERATE.sh +++ b/downloads-generation/data_published/GENERATE.sh @@ -25,7 +25,7 @@ date cd $SCRATCH_DIR/$DOWNLOAD_NAME ############################################ -# BINDING AFFINITIES +# BINDING AFFINITIES: class I ############################################ # # Kim et al 2014 [PMID 25017736] @@ -36,7 +36,7 @@ wget -q https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.201 mkdir raw ############################################ -# MS: Multiallelic +# MS: Multiallelic class I ############################################ # Bassani-Sternberg, ..., Gfeller PLOS Comp. Bio. 2017 [PMID 28832583] # The first dataset is from this work. The second dataset is originally from: @@ -84,8 +84,15 @@ wget -q https://www.mcponline.org/lookup/suppl/doi:10.1074/mcp.M116.060350/-/DC1 # Hassan, ..., van Veelen Mol Cell Proteomics 2015 [PMID 23481700] PMID=23481700 mkdir -p raw/$PMID -wget -q https://www.mcponline.org/highwire/filestream/34681/field_highwire_adjunct_files/1/mcp.M112.024810-2.xls -P raw/$PMID +wget -q https://www.mcponline.org/highwire/filestream/34681/field_highwire_adjunct_files/1/mcp.M112.024810-2.xls -P raw/$PMID +############################################ +# MS: Monoallelic class II +############################################ +# Abelin, ..., Rooney Immunity 2019 [PMID 31495665] +PMID=31495665 +mkdir -p raw/$PMID +wget -q https://ars.els-cdn.com/content/image/1-s2.0-S1074761319303632-mmc2.xlsx -P raw/$PMID cp $SCRIPT_ABSOLUTE_PATH . diff --git a/downloads-generation/data_published/README.md b/downloads-generation/data_published/README.md index 807adbef..1e70529d 100644 --- a/downloads-generation/data_published/README.md +++ b/downloads-generation/data_published/README.md @@ -8,17 +8,11 @@ To generate this download run: ./GENERATE.sh ``` -## Kim 2014 - This download contains the BD2009, BD2013, and BLIND datasets from [Dataset size and composition impact the reliability of performance benchmarks for peptide-MHC binding predictions](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241). BD2013 (augmented with more recent data from IEDB) are used to train the production MHCflurry models. BD2009 and BLIND are useful for performing validation on held-out data. - -## Abelin et al. Immunity 2017 - -This download contains the peptides identified in -[Mass Spectrometry Profiling of HLA-Associated Peptidomes in Mono-allelic Cells Enables More Accurate Epitope Prediction](https://www.ncbi.nlm.nih.gov/pubmed/28228285). +The other published data sets correspond to the publications indicated in GENERATE.sh. diff --git a/mhcflurry/downloads.yml b/mhcflurry/downloads.yml index 21af8784..3c731928 100644 --- a/mhcflurry/downloads.yml +++ b/mhcflurry/downloads.yml @@ -109,7 +109,7 @@ releases: default: false - name: data_published - url: http://github.com/openvax/mhcflurry/releases/download/pan-dev1/data_published.tar.bz2 + url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_published.20190920.tar.bz2 default: false - name: data_curated -- GitLab