Skip to content
Snippets Groups Projects
Commit 4033fb4d authored by Timothy ODonnell's avatar Timothy ODonnell
Browse files

fix

parent 89db282d
No related merge requests found
...@@ -51,8 +51,9 @@ UNSELECTED_PATH="$(mhcflurry-downloads path models_class1_pan_unselected)" ...@@ -51,8 +51,9 @@ UNSELECTED_PATH="$(mhcflurry-downloads path models_class1_pan_unselected)"
for kind in with_mass_spec no_mass_spec for kind in with_mass_spec no_mass_spec
do do
# Model selection is always done locally. It's fast enough that it # Model selection is run on the cluster, although for any reasonable
# doesn't make sense to put it on the cluster. # machine it could be run locally. We run on the cluster because our
# cluster login nodes are often overloaded.
MODELS_DIR="$UNSELECTED_PATH/models.${kind}" MODELS_DIR="$UNSELECTED_PATH/models.${kind}"
time mhcflurry-class1-select-pan-allele-models \ time mhcflurry-class1-select-pan-allele-models \
--data "$MODELS_DIR/train_data.csv.bz2" \ --data "$MODELS_DIR/train_data.csv.bz2" \
...@@ -60,8 +61,14 @@ do ...@@ -60,8 +61,14 @@ do
--out-models-dir models.${kind} \ --out-models-dir models.${kind} \
--min-models 2 \ --min-models 2 \
--max-models 8 \ --max-models 8 \
--num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 --verbosity 1 \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--cluster-parallelism \
--cluster-max-retries 15 \
--cluster-submit-command bsub \
--cluster-results-workdir ~/mhcflurry-scratch \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
cp "$MODELS_DIR/train_data.csv.bz2" "models.${kind}/" cp "$MODELS_DIR/train_data.csv.bz2" "models.${kind}/"
# Percentile rank calibration is run on the cluster. # Percentile rank calibration is run on the cluster.
...@@ -72,12 +79,13 @@ do ...@@ -72,12 +79,13 @@ do
--models-dir models.${kind} \ --models-dir models.${kind} \
--match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \ --match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \
--motif-summary \ --motif-summary \
--num-peptides-per-length 1000000 \ --num-peptides-per-length 100000 \
--allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \ --allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \
--verbosity 1 \ --verbosity 1 \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--prediction-batch-size 524288 \ --prediction-batch-size 524288 \
--cluster-parallelism \ --cluster-parallelism \
--cluster-max-retries 15 \
--cluster-submit-command bsub \ --cluster-submit-command bsub \
--cluster-results-workdir ~/mhcflurry-scratch \ --cluster-results-workdir ~/mhcflurry-scratch \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
......
...@@ -64,7 +64,7 @@ do ...@@ -64,7 +64,7 @@ do
--models-dir models.${kind} \ --models-dir models.${kind} \
--match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \ --match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \
--motif-summary \ --motif-summary \
--num-peptides-per-length 1000000 \ --num-peptides-per-length 100000 \
--allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \ --allele $(bzcat "$MODELS_DIR/train_data.csv.bz2" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) \
--verbosity 1 \ --verbosity 1 \
--num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
......
../models_class1_pan_unselected/cluster_submit_script_header.mssm_hpc.lsf
\ No newline at end of file
#!/bin/bash
#BSUB -J MHCf-{work_item_num} # Job name
#BSUB -P acc_nkcancer # allocation account or Unix group
#BSUB -q gpu # queue
#BSUB -R rusage[ngpus_excl_p=1] # 1 exclusive GPU
#BSUB -R span[hosts=1] # one node
#BSUB -n 1 # number of compute cores
#BSUB -W 46:00 # walltime in HH:MM
#BSUB -R rusage[mem=30000] # mb memory requested
#BSUB -o {work_dir}/%J.stdout # output log (%J : JobID)
#BSUB -eo {work_dir}/STDERR # error log
#BSUB -L /bin/bash # Initialize the execution environment
#
set -e
set -x
echo "Subsequent stderr output redirected to stdout" >&2
exec 2>&1
export TMPDIR=/local/JOBS/mhcflurry-{work_item_num}
export PATH=$HOME/.conda/envs/py36b/bin/:$PATH
export PYTHONUNBUFFERED=1
export KMP_SETTINGS=1
free -m
module add cuda/10.0.130 cudnn/7.1.1
module list
# python -c 'import tensorflow as tf ; print("GPU AVAILABLE" if tf.test.is_gpu_available() else "GPU NOT AVAILABLE")'
env
cd {work_dir}
...@@ -54,6 +54,7 @@ def add_cluster_parallelism_args(parser): ...@@ -54,6 +54,7 @@ def add_cluster_parallelism_args(parser):
) )
group.add_argument( group.add_argument(
'--cluster-max-retries', '--cluster-max-retries',
type=int,
help="How many times to rerun failing jobs. Default: %(default)s", help="How many times to rerun failing jobs. Default: %(default)s",
default=3) default=3)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment