Skip to content
Snippets Groups Projects
Commit 4033fb4d authored by Timothy ODonnell's avatar Timothy ODonnell
Browse files

fix

parent 89db282d
No related branches found
No related tags found
No related merge requests found
......@@ -51,8 +51,9 @@ UNSELECTED_PATH="$(mhcflurry-downloads path models_class1_pan_unselected)"
for kind in with_mass_spec no_mass_spec
do
# Model selection is always done locally. It's fast enough that it
# doesn't make sense to put it on the cluster.
# Model selection is run on the cluster, although for any reasonable
# machine it could be run locally. We run on the cluster because our
# cluster login nodes are often overloaded.
MODELS_DIR="$UNSELECTED_PATH/models.${kind}"
time mhcflurry-class1-select-pan-allele-models \
--data "$MODELS_DIR/train_data.csv.bz2" \
......@@ -60,8 +61,14 @@ do
--out-models-dir models.${kind} \
--min-models 2 \
--max-models 8 \
--num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
--verbosity 1 \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--cluster-parallelism \
--cluster-max-retries 15 \
--cluster-submit-command bsub \
--cluster-results-workdir ~/mhcflurry-scratch \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
cp "$MODELS_DIR/train_data.csv.bz2" "models.${kind}/"
# Percentile rank calibration is run on the cluster.
......@@ -72,12 +79,13 @@ do
--models-dir models.${kind} \
--match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \
--motif-summary \
--num-peptides-per-length 1000000 \
--num-peptides-per-length 100000 \
--allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \
--verbosity 1 \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--prediction-batch-size 524288 \
--cluster-parallelism \
--cluster-max-retries 15 \
--cluster-submit-command bsub \
--cluster-results-workdir ~/mhcflurry-scratch \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
......
......@@ -64,7 +64,7 @@ do
--models-dir models.${kind} \
--match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \
--motif-summary \
--num-peptides-per-length 1000000 \
--num-peptides-per-length 100000 \
--allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \
--verbosity 1 \
--num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
......
../models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf
\ No newline at end of file
#!/bin/bash
#BSUB -J MHCf-{work_item_num} # Job name
#BSUB -P acc_nkcancer # allocation account or Unix group
#BSUB -q gpu # queue
#BSUB -R rusage[ngpus_excl_p=1] # 1 exclusive GPU
#BSUB -R span[hosts=1] # one node
#BSUB -n 1 # number of compute cores
#BSUB -W 46:00 # walltime in HH:MM
#BSUB -R rusage[mem=30000] # mb memory requested
#BSUB -o {work_dir}/%J.stdout # output log (%J : JobID)
#BSUB -eo {work_dir}/STDERR # error log
#BSUB -L /bin/bash # Initialize the execution environment
#
set -e
set -x
echo "Subsequent stderr output redirected to stdout" >&2
exec 2>&1
export TMPDIR=/local/JOBS/mhcflurry-{work_item_num}
export PATH=$HOME/.conda/envs/py36b/bin/:$PATH
export PYTHONUNBUFFERED=1
export KMP_SETTINGS=1
free -m
module add cuda/10.0.130 cudnn/7.1.1
module list
# python -c 'import tensorflow as tf ; print("GPU AVAILABLE" if tf.test.is_gpu_available() else "GPU NOT AVAILABLE")'
env
cd {work_dir}
......@@ -54,6 +54,7 @@ def add_cluster_parallelism_args(parser):
)
group.add_argument(
'--cluster-max-retries',
type=int,
help="How many times to rerun failing jobs. Default: %(default)s",
default=3)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment