Newer
Older
#!/bin/bash
#
#
set -e
set -x
DOWNLOAD_NAME=data_mass_spec_benchmark
SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
export PYTHONUNBUFFERED=1
mkdir -p "$SCRATCH_DIR"
rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
# Send stdout and stderr to a logfile included with the archive.
exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
# Log some environment info
date
pip freeze
git status
cd $SCRATCH_DIR/$DOWNLOAD_NAME
cp $SCRIPT_DIR/write_proteome_peptides.py .
cp $SCRIPT_DIR/run_mhcflurry.py .
cp $SCRIPT_DIR/write_allele_list.py .
PEPTIDES=$(mhcflurry-downloads path data_mass_spec_annotated)/annotated_ms.csv.bz2
REFERENCES_DIR=$(mhcflurry-downloads path data_references)
python write_proteome_peptides.py \
"$PEPTIDES" \
"${REFERENCES_DIR}/uniprot_proteins.csv.bz2" \
python write_allele_list.py "$PEPTIDES" --out alleles.txt
mkdir predictions
for kind in with_mass_spec no_mass_spec
do
python run_mhcflurry.py \
--models-dir "$(mhcflurry-downloads path models_class1_pan)/models.$kind" \
--allele $(cat alleles.txt) \
--verbosity 1 \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--cluster-parallelism \
--cluster-max-retries 15 \
--cluster-submit-command bsub \
--cluster-results-workdir ~/mhcflurry-scratch \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
done