Newer
Older
#!/bin/bash
#
#
set -e
set -x
DOWNLOAD_NAME=data_mass_spec_benchmark
SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
export PYTHONUNBUFFERED=1
mkdir -p "$SCRATCH_DIR"
rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
# Send stdout and stderr to a logfile included with the archive.
exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
# Log some environment info
date
pip freeze
git status
cd $SCRATCH_DIR/$DOWNLOAD_NAME
cp $SCRIPT_DIR/write_proteome_peptides.py .
cp $SCRIPT_DIR/run_mhcflurry.py .
cp $SCRIPT_DIR/write_allele_list.py .
PEPTIDES=$(mhcflurry-downloads path data_mass_spec_annotated)/annotated_ms.csv.bz2
REFERENCES_DIR=$(mhcflurry-downloads path data_references)
python write_allele_list.py "$PEPTIDES" --out alleles.txt
mkdir predictions
# First just chr1 peptides
python write_proteome_peptides.py \
"$PEPTIDES" \
"${REFERENCES_DIR}/uniprot_proteins.csv.bz2" \
#for kind in with_mass_spec no_mass_spec
#do
# python run_mhcflurry.py \
# proteome_peptides.chr1.csv \
# --chunk-size 100000 \
# --batch-size 65536 \
# --models-dir "$(mhcflurry-downloads path models_class1_pan)/models.$kind" \
# --allele $(cat alleles.txt) \
# --out "predictions/chr1.mhcflurry.$kind" \
# --verbosity 1 \
# --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
# --cluster-parallelism \
# --cluster-max-retries 15 \
# --cluster-submit-command bsub \
# --cluster-results-workdir ~/mhcflurry-scratch \
# --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
#done
python run_thirdparty_predictors.py \
proteome_peptides.chr1.csv \
--predictor netmhcpan4 \
--chunk-size 100000 \
--allele $(cat alleles.txt) \
--out "predictions/chr1.netmhcpan4" \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--cluster-parallelism \
--cluster-max-retries 15 \
--cluster-submit-command bsub \
--cluster-results-workdir ~/mhcflurry-scratch \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
# Now all peptides
python write_proteome_peptides.py \
"$PEPTIDES" \
"${REFERENCES_DIR}/uniprot_proteins.csv.bz2" \
--out proteome_peptides.all.csv
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#for kind in with_mass_spec no_mass_spec
#do
# python run_mhcflurry.py \
# proteome_peptides.all.csv \
# --chunk-size 500000 \
# --batch-size 65536 \
# --models-dir "$(mhcflurry-downloads path models_class1_pan)/models.$kind" \
# --allele $(cat alleles.txt) \
# --out "predictions/all.mhcflurry.$kind" \
# --verbosity 1 \
# --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
# --cluster-parallelism \
# --cluster-max-retries 15 \
# --cluster-submit-command bsub \
# --cluster-results-workdir ~/mhcflurry-scratch \
# --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
#done
python run_thirdparty_predictors.py \
proteome_peptides.all.csv \
--predictor netmhcpan4 \
--chunk-size 100000 \
--allele $(cat alleles.txt) \
--out "predictions/all.netmhcpan4" \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--cluster-parallelism \
--cluster-max-retries 15 \
--cluster-submit-command bsub \
--cluster-results-workdir ~/mhcflurry-scratch \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf