diff --git a/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh index ec154e9a3b0ebb84411d84b316ac6b307899fc0a..03023a915083fc1c2c24a85ad1909bbb15ef7411 100755 --- a/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh +++ b/downloads-generation/models_class1_pan_unselected/GENERATE.WITH_HPC_CLUSTER.sh @@ -14,14 +14,21 @@ SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename " SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH") mkdir -p "$SCRATCH_DIR" -rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" -mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" +if [ "$1" != "continue-incomplete" ] +then + echo "Fresh run" + rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" + mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" +else + echo "Continuing incomplete run" +fi # Send stdout and stderr to a logfile included with the archive. exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2) # Log some environment info +echo "Invocation: $0 $@" date pip freeze git status @@ -31,12 +38,22 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME export OMP_NUM_THREADS=1 export PYTHONUNBUFFERED=1 -cp $SCRIPT_DIR/generate_hyperparameters.py . -python generate_hyperparameters.py > hyperparameters.yaml +if [ "$1" != "continue-incomplete" ] +then + cp $SCRIPT_DIR/generate_hyperparameters.py . + python generate_hyperparameters.py > hyperparameters.yaml +fi for kind in with_mass_spec no_mass_spec do - echo mhcflurry-class1-train-pan-allele-models \ + EXTRA_TRAIN_ARGS="" + if [ "$1" == "continue-incomplete" ] && [ -d "models.${kind}" ] + then + echo "Will continue existing run: $kind" + EXTRA_TRAIN_ARGS="--continue-incomplete" + fi + + mhcflurry-class1-train-pan-allele-models \ --data "$(mhcflurry-downloads path data_curated)/curated_training_data.${kind}.csv.bz2" \ --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \ --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \ @@ -50,26 +67,7 @@ do --cluster-submit-command bsub \ --cluster-results-workdir ~/mhcflurry-scratch \ --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf \ - \\ > INITIALIZE.${kind}.sh - - cp INITIALIZE.${kind}.sh PROCESS.${kind}.sh - echo "--only-initialize" >> INITIALIZE.${kind}.sh - echo "--continue-incomplete" >> PROCESS.${kind}.sh - - bash INITIALIZE.${kind}.sh - echo "Done initializing." - - bash PROCESS.${kind}.sh && touch $(pwd)/models.${kind}/COMPLETE || true - echo "Processing terminated." - - # In case the above command fails, the job can may still be fixable manually. - # So we wait for the COMPLETE file here. - while [ ! -f models.${kind}/COMPLETE ] - do - echo "Waiting for $(pwd)/models.${kind}/COMPLETE" - echo "Processing script: $(pwd)/PROCESS.${kind}.sh" - sleep 60 - done + $EXTRA_TRAIN_ARGS done cp $SCRIPT_ABSOLUTE_PATH . diff --git a/downloads-generation/models_class1_pan_unselected/GENERATE.sh b/downloads-generation/models_class1_pan_unselected/GENERATE.sh index 4be7648aec01ce91adae8370f3e0512fecd31dd8..511dcae67c147be841c60f16018872c81eae968b 100755 --- a/downloads-generation/models_class1_pan_unselected/GENERATE.sh +++ b/downloads-generation/models_class1_pan_unselected/GENERATE.sh @@ -11,8 +11,14 @@ SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename " SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH") mkdir -p "$SCRATCH_DIR" -rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" -mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" +if [ "$1" != "continue-incomplete" ] +then + echo "Fresh run" + rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" + mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" +else + echo "Continuing incomplete run" +fi # Send stdout and stderr to a logfile included with the archive. exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") @@ -43,8 +49,21 @@ echo "Num jobs: $NUM_JOBS" export PYTHONUNBUFFERED=1 +if [ "$1" != "continue-incomplete" ] +then + cp $SCRIPT_DIR/generate_hyperparameters.py . + python generate_hyperparameters.py > hyperparameters.yaml +fi + for kind in with_mass_spec no_mass_spec do + EXTRA_TRAIN_ARGS="" + if [ "$1" == "continue-incomplete" ] && [ -d "models.${kind}" ] + then + echo "Will continue existing run: $kind" + EXTRA_TRAIN_ARGS="--continue-incomplete" + fi + mhcflurry-class1-train-pan-allele-models \ --data "$(mhcflurry-downloads path data_curated)/curated_training_data.${kind}.csv.bz2" \ --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \ @@ -55,7 +74,8 @@ do --out-models-dir models.${kind} \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --verbosity 0 \ - --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 + --num-jobs $NUM_JOBS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 \ + $EXTRA_TRAIN_ARGS done cp $SCRIPT_ABSOLUTE_PATH . diff --git a/downloads-generation/models_class1_pan_unselected/generate_hyperparameters.py b/downloads-generation/models_class1_pan_unselected/generate_hyperparameters.py index defa64812ca422e9352468044b20f53544066c72..8f8bf51880202a5161e9273bb0be0f4e6518ad3c 100644 --- a/downloads-generation/models_class1_pan_unselected/generate_hyperparameters.py +++ b/downloads-generation/models_class1_pan_unselected/generate_hyperparameters.py @@ -36,7 +36,7 @@ base_hyperparameters = { 'peptide_dense_layer_sizes': [], 'random_negative_affinity_max': 50000.0, 'random_negative_affinity_min': 20000.0, - 'random_negative_constant': 25, + 'random_negative_constant': 1500, 'random_negative_distribution_smoothing': 0.0, 'random_negative_match_distribution': True, 'random_negative_rate': 0.2, diff --git a/mhcflurry/local_parallelism.py b/mhcflurry/local_parallelism.py index 85806b71ba0784cce08e140b31633c5e797b5537..b471f52a20b104d191455e8bd842096470950bdc 100644 --- a/mhcflurry/local_parallelism.py +++ b/mhcflurry/local_parallelism.py @@ -1,7 +1,7 @@ import traceback import sys import os -import functools +import time from multiprocessing import Pool, Queue, cpu_count from six.moves import queue from multiprocessing.util import Finalize @@ -218,12 +218,9 @@ def worker_init_entry_point( def worker_init(keras_backend=None, gpu_device_nums=None, worker_log_dir=None): if worker_log_dir: - sys.stderr = sys.stdout = open( - os.path.join(worker_log_dir, "LOG-" - "" - "" - "" - "worker.%d.txt" % os.getpid()), "w") + sys.stderr = sys.stdout = open(os.path.join( + worker_log_dir, + "LOG-worker.%d.%d.txt" % (os.getpid(), int(time.time()))), "w") # Each worker needs distinct random numbers numpy.random.seed()