Skip to content
Snippets Groups Projects
Commit 48d6eed2 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fixes

parent ab2ca5d0
No related branches found
No related tags found
No related merge requests found
......@@ -29,23 +29,33 @@ git status
cd $SCRATCH_DIR/$DOWNLOAD_NAME
cp $SCRIPT_DIR/curate.py .
cp $SCRIPT_DIR/curate_by_pmid.py .
cp $SCRIPT_DIR/curate_ms_by_pmid.py .
RAW_DIR="$(mhcflurry-downloads path data_published)/raw"
cp -r "$RAW_DIR" .
MS_DIR="$(mhcflurry-downloads path data_published)/ms"
cp -r "$MS_DIR" .
EXPRESSION_DIR="$(mhcflurry-downloads path data_published)/expression"
cp -r "$EXPRESSION_DIR" .
CURATE_BY_PMID_ARGS=""
for pmid in $(ls raw)
for pmid in $(ls ms)
do
CURATE_BY_PMID_ARGS+=$(echo --ms-item $pmid ms/$pmid/* ' ')
done
for item in $(ls expression)
do
CURATE_BY_PMID_ARGS+=$(echo --item $pmid raw/$pmid/* ' ')
CURATE_BY_PMID_ARGS+=$(echo --expression-item $item expression/$item/* ' ')
done
time python curate_by_pmid.py $CURATE_BY_PMID_ARGS \
--out nontraining_curated.by_pmid.csv
time python curate_ms_by_pmid.py $CURATE_BY_PMID_ARGS \
--ms-out ms.nontraining_curated.by_pmid.csv \
--expression-out rna_expression.csv
bzip2 ms.nontraining_curated.by_pmid.csv
bzip2 rna_expression.csv
bzip2 nontraining_curated.by_pmid.csv
rm -rf raw
rm -rf ms
# No mass-spec data
time python curate.py \
......
......@@ -33,7 +33,7 @@ wget -q https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.200
wget -q https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.20130222.mhci.public.1.txt
wget -q https://github.com/openvax/mhcflurry/releases/download/pre-1.1/bdata.2013.mhci.public.blind.1.txt
mkdir raw
mkdir ms
############################################
# MS: Multiallelic class I
......@@ -43,10 +43,10 @@ mkdir raw
# Pearson, ..., Perreault JCI 2016 [PMID 27841757]
# but was reanalyzed in this work, and we download the reanalyzed version here.
PMID=28832583
mkdir -p raw/$PMID
wget -q https://doi.org/10.1371/journal.pcbi.1005725.s002 -P raw/$PMID # data generated in this work
wget -q https://doi.org/10.1371/journal.pcbi.1005725.s003 -P raw/$PMID # data reanalyzed in this work
cd raw/$PMID
mkdir -p ms/$PMID
wget -q https://doi.org/10.1371/journal.pcbi.1005725.s002 -P ms/$PMID # data generated in this work
wget -q https://doi.org/10.1371/journal.pcbi.1005725.s003 -P ms/$PMID # data reanalyzed in this work
cd ms/$PMID
unzip *.s002
unzip *.s003
mkdir saved
......@@ -58,33 +58,33 @@ cd ../..
# Bassani-Sternberg, ..., Mann Mol Cell Proteomics 2015 [PMID 25576301]
PMID=25576301
mkdir -p raw/$PMID
wget -q https://www.mcponline.org/highwire/filestream/35026/field_highwire_adjunct_files/7/mcp.M114.042812-4.xlsx -P raw/$PMID
mkdir -p ms/$PMID
wget -q https://www.mcponline.org/highwire/filestream/35026/field_highwire_adjunct_files/7/mcp.M114.042812-4.xlsx -P ms/$PMID
# Mommen, ..., Heck PNAS 2014 [PMID 24616531]
PMID=24616531
mkdir -p raw/$PMID
wget -q https://www.pnas.org/highwire/filestream/615485/field_highwire_adjunct_files/1/sd01.xlsx -P raw/$PMID
mkdir -p ms/$PMID
wget -q https://www.pnas.org/highwire/filestream/615485/field_highwire_adjunct_files/1/sd01.xlsx -P ms/$PMID
# Gloger, ..., Neri Cancer Immunol Immunother 2016 [PMID 27600516]
# Data extracted from supplemental PDF table.
PMID=27600516
mkdir -p raw/$PMID
wget -q https://github.com/openvax/mhcflurry/releases/download/pan-dev1/27600516.peptides.csv -P raw/$PMID
mkdir -p ms/$PMID
wget -q https://github.com/openvax/mhcflurry/releases/download/pan-dev1/27600516.peptides.csv -P ms/$PMID
# Ritz, ..., Fugmann Proteomics 2016 [PMID 26992070]
# Supplemental zip downloaded from publication
PMID=26992070
mkdir -p raw/$PMID
wget -q https://github.com/openvax/mhcflurry/releases/download/pan-dev1/pmic12297-sup-0001-supinfo.zip -P raw/$PMID
cd raw/$PMID
mkdir -p ms/$PMID
wget -q https://github.com/openvax/mhcflurry/releases/download/pan-dev1/pmic12297-sup-0001-supinfo.zip -P ms/$PMID
cd ms/$PMID
unzip pmic12297-sup-0001-supinfo.zip
cd ../..
# Shraibman, ..., Admon Mol Cell Proteomics 2016 [PMID 27412690]
PMID=27412690
mkdir -p raw/$PMID
wget -q https://www.mcponline.org/lookup/suppl/doi:10.1074/mcp.M116.060350/-/DC1/mcp.M116.060350-2.xlsx -P raw/$PMID
mkdir -p ms/$PMID
wget -q https://www.mcponline.org/lookup/suppl/doi:10.1074/mcp.M116.060350/-/DC1/mcp.M116.060350-2.xlsx -P ms/$PMID
# Pearson, ..., Perreault J Clin Invest 2016 [PMID 27841757]
# Note: we do not use the original data from this publicaton, we use 28832583's reanalysis of it.
......@@ -92,17 +92,59 @@ wget -q https://www.mcponline.org/lookup/suppl/doi:10.1074/mcp.M116.060350/-/DC1
# Hassan, ..., van Veelen Mol Cell Proteomics 2015 [PMID 23481700]
PMID=23481700
mkdir -p raw/$PMID
wget -q https://www.mcponline.org/highwire/filestream/34681/field_highwire_adjunct_files/1/mcp.M112.024810-2.xls -P raw/$PMID
mkdir -p ms/$PMID
wget -q https://www.mcponline.org/highwire/filestream/34681/field_highwire_adjunct_files/1/mcp.M112.024810-2.xls -P ms/$PMID
# Shraibman, ..., Admon Mol Cell Proteomics 2019 [PMID 31154438]
PMID=31154438
mkdir -p ms/$PMID
wget -q https://www.mcponline.org/highwire/filestream/51948/field_highwire_adjunct_files/3/zjw006195963st2.txt -P ms/$PMID
wget -q https://www.mcponline.org/highwire/filestream/51948/field_highwire_adjunct_files/1/zjw006195963st1.xlsx -P ms/$PMID
# Bassani-Sternberg, ..., Krackhardt Nature Comm. 2016 [PMID 27869121]
PMID=27869121
mkdir -p ms/$PMID
wget -q "https://static-content.springer.com/esm/art%3A10.1038%2Fncomms13404/MediaObjects/41467_2016_BFncomms13404_MOESM1318_ESM.xlsx" -P ms/$PMID
############################################
# MS: Monoallelic class II
############################################
# Abelin, ..., Rooney Immunity 2019 [PMID 31495665]
PMID=31495665
mkdir -p raw/$PMID
wget -q https://ars.els-cdn.com/content/image/1-s2.0-S1074761319303632-mmc2.xlsx -P raw/$PMID
mkdir -p ms/$PMID
wget -q https://ars.els-cdn.com/content/image/1-s2.0-S1074761319303632-mmc2.xlsx -P ms/$PMID
############################################
# RNA-seq expression data (TPMs)
############################################
# CCLE as processed by expression atlas
DATASET=expression-atlas-22460905
mkdir -p expression/$DATASET
wget -q https://www.ebi.ac.uk/gxa/experiments-content/E-MTAB-2770/resources/ExperimentDownloadSupplier.RnaSeqBaseline/tpms.tsv -P expression/$DATASET
# Human protein atlas
DATASET=human-protein-atlas
mkdir -p expression/$DATASET
cd expression/$DATASET
wget -q https://www.proteinatlas.org/download/rna_celline.tsv.zip
wget -q https://www.proteinatlas.org/download/rna_blood_cell_sample_tpm_m.tsv.zip
wget -q https://www.proteinatlas.org/download/rna_tissue_gtex.tsv.zip
for i in $(ls *.zip)
do
unzip $i
rm $i
done
cd ../..
# Melanoma. Original publication
# Barry, ..., Krummel Nature Medicine 2018 [PMID 29942093].
DATASET=GSE113126
mkdir -p expression/$DATASET
cd expression/$DATASET
wget -q "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE113126&format=file" -O GSE113126_RAW.tar
tar -xvf GSE113126_RAW.tar
rm GSE113126_RAW.tar
cd ../..
cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt
......
......@@ -62,15 +62,15 @@ if [ "$2" != "continue-incomplete" ]
then
cp $SCRIPT_DIR/generate_hyperparameters.production.py .
cp $SCRIPT_DIR/generate_hyperparameters.py .
python generate_hyperparameters.production.py > hyperparameters.production.json
python generate_hyperparameters.py hyperparameters.production.json no_pretrain > hyperparameters.no_pretrain.yaml
python generate_hyperparameters.production.py > hyperparameters.production.yaml
python generate_hyperparameters.py hyperparameters.production.yaml no_pretrain > hyperparameters.no_pretrain.yaml
python generate_hyperparameters.py hyperparameters.no_pretrain.yaml single_hidden > hyperparameters.single_hidden_no_pretrain.yaml
fi
for kind in single_hidden_no_pretrain no_pretrain 34mer_sequence
do
CONTINUE_INCOMPLETE_ARGS=""
if [ "$2" == "continue-incomplete" ] && [ -d "models.${kind}" ]
if [ "$2" == "continue-incomplete" ] && [ -d "models.unselected.${kind}" ]
then
echo "Will continue existing run: $kind"
CONTINUE_INCOMPLETE_ARGS="--continue-incomplete"
......
......@@ -37,10 +37,6 @@ releases:
url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_mass_spec_annotated.20190930.tar.bz2
default: false
- name: data_expression
url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_expression.20191009.tar.bz2
default: false
- name: data_references
url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_references.20190927.tar.bz2
default: false
......@@ -62,7 +58,7 @@ releases:
default: false
- name: data_published
url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_published.20190924.tar.bz2
url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_published.20191011.tar.bz2
default: false
- name: data_curated
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment