Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash
set -e
set -x
DOWNLOAD_NAME=data_combined_iedb_kim2014
SCRATCH_DIR=/tmp/mhcflurry-downloads-generation
SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
mkdir -p "$SCRATCH_DIR"
rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
# Send stdout and stderr to a logfile included with the archive.
exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
# Log some environment info
date
pip freeze
git rev-parse HEAD
git status
cd "$SCRATCH_DIR/$DOWNLOAD_NAME"
mkdir .tmp # By starting with a dot, we won't include it in the tar archive
cd .tmp
wget --quiet http://www.iedb.org/doc/mhc_ligand_full.zip
unzip mhc_ligand_full.zip
$SCRIPT_DIR/create-iedb-class1-dataset.py \
--input-csv mhc_ligand_full.csv \
--output-pickle-filename iedb_human_class1_assay_datasets.pickle
$SCRIPT_DIR/create-combined-class1-dataset.py \
--iedb-pickle-path iedb_human_class1_assay_datasets.pickle \
--netmhcpan-csv-path "$(mhcflurry-downloads path data_kim2014)/bdata.20130222.mhci.public.1.txt" \
--output-csv-filename ../combined_human_class1_dataset.csv
cd ..
cp $SCRIPT_ABSOLUTE_PATH .
tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"