#!/bin/bash # # set -e set -x DOWNLOAD_NAME=data_mass_spec_benchmark SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")" SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH") export PYTHONUNBUFFERED=1 mkdir -p "$SCRATCH_DIR" rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" # Send stdout and stderr to a logfile included with the archive. exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2) # Log some environment info date pip freeze git status cd $SCRATCH_DIR/$DOWNLOAD_NAME cp $SCRIPT_DIR/write_proteome_peptides.py . cp $SCRIPT_DIR/run_mhcflurry.py . cp $SCRIPT_DIR/run_thirdparty_predictors.py . cp $SCRIPT_DIR/write_allele_list.py . PEPTIDES=$(mhcflurry-downloads path data_mass_spec_annotated)/annotated_ms.csv.bz2 REFERENCES_DIR=$(mhcflurry-downloads path data_references) python write_allele_list.py "$PEPTIDES" --out alleles.txt mkdir predictions # First just chr1 peptides python write_proteome_peptides.py \ "$PEPTIDES" \ "${REFERENCES_DIR}/uniprot_proteins.csv.bz2" \ --chromosome 1 \ --out proteome_peptides.chr1.csv #for kind in with_mass_spec no_mass_spec #do # python run_mhcflurry.py \ # proteome_peptides.chr1.csv \ # --chunk-size 100000 \ # --batch-size 65536 \ # --models-dir "$(mhcflurry-downloads path models_class1_pan)/models.$kind" \ # --allele $(cat alleles.txt) \ # --out "predictions/chr1.mhcflurry.$kind" \ # --verbosity 1 \ # --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ # --cluster-parallelism \ # --cluster-max-retries 15 \ # --cluster-submit-command bsub \ # --cluster-results-workdir ~/mhcflurry-scratch \ # --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf #done python run_thirdparty_predictors.py \ proteome_peptides.chr1.csv \ --predictor netmhcpan4 \ --chunk-size 100000 \ --allele $(cat alleles.txt) \ --out "predictions/chr1.netmhcpan4" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --cluster-parallelism \ --cluster-max-retries 15 \ --cluster-submit-command bsub \ --cluster-results-workdir ~/mhcflurry-scratch \ --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf # Now all peptides python write_proteome_peptides.py \ "$PEPTIDES" \ "${REFERENCES_DIR}/uniprot_proteins.csv.bz2" \ --out proteome_peptides.all.csv #for kind in with_mass_spec no_mass_spec #do # python run_mhcflurry.py \ # proteome_peptides.all.csv \ # --chunk-size 500000 \ # --batch-size 65536 \ # --models-dir "$(mhcflurry-downloads path models_class1_pan)/models.$kind" \ # --allele $(cat alleles.txt) \ # --out "predictions/all.mhcflurry.$kind" \ # --verbosity 1 \ # --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ # --cluster-parallelism \ # --cluster-max-retries 15 \ # --cluster-submit-command bsub \ # --cluster-results-workdir ~/mhcflurry-scratch \ # --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf #done python run_thirdparty_predictors.py \ proteome_peptides.all.csv \ --predictor netmhcpan4 \ --chunk-size 10000 \ --allele $(cat alleles.txt) \ --out "predictions/all.netmhcpan4" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --cluster-parallelism \ --cluster-max-retries 15 \ --cluster-submit-command bsub \ --cluster-results-workdir ~/mhcflurry-scratch \ --cluster-script-prefix-path cluster_submit_script_header.mssm_hpc.nogpu.lsf bzip2 proteome_peptides.chr1.csv bzip2 proteome_peptides.all.csv cp $SCRIPT_ABSOLUTE_PATH . bzip2 LOG.txt RESULT="$SCRATCH_DIR/${DOWNLOAD_NAME}.$(date +%Y%m%d).tar.bz2" tar -cjf "$RESULT" * echo "Created archive: $RESULT"