From f2858cff2b243a46ff33631a5dfbcacac52330d0 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Wed, 12 Jun 2019 08:58:51 -0400 Subject: [PATCH] fix generrate.sh --- .../models_class1_pan/GENERATE.sh | 14 ++++++-------- .../generate_hyperparameters.py | 16 +++++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/downloads-generation/models_class1_pan/GENERATE.sh b/downloads-generation/models_class1_pan/GENERATE.sh index 8724ed3f..ca880004 100755 --- a/downloads-generation/models_class1_pan/GENERATE.sh +++ b/downloads-generation/models_class1_pan/GENERATE.sh @@ -15,8 +15,8 @@ rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" # Send stdout and stderr to a logfile included with the archive. -#exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") -#exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2) +exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") +exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2) # Log some environment info date @@ -35,7 +35,7 @@ echo "Detected GPUS: $GPUS" PROCESSORS=$(getconf _NPROCESSORS_ONLN) echo "Detected processors: $PROCESSORS" -time mhcflurry-class1-train-pan-allele-models \ +mhcflurry-class1-train-pan-allele-models \ --data "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \ --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \ --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \ @@ -43,14 +43,12 @@ time mhcflurry-class1-train-pan-allele-models \ --ensemble-size 4 \ --hyperparameters hyperparameters.yaml \ --out-models-dir models-unselected \ - - - #--num-jobs $(expr $PROCESSORS \* 2) --gpus $GPUS --max-workers-per-gpu 2 --max-tasks-per-worker 50 + --num-jobs 4 --gpus $GPUS --max-workers-per-gpu 1 --max-tasks-per-worker 1 cp $SCRIPT_ABSOLUTE_PATH . bzip2 LOG.txt tar -cjf "../${DOWNLOAD_NAME}.with_unselected.tar.bz2" * echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.with_unselected.tar.bz2" -ls * | grep -v models-unselected | xargs -I {} tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" {} -echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.tar.bz2" \ No newline at end of file +ls -d * | grep -v models-unselected | xargs -I {} tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" {} +echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.tar.bz2" diff --git a/downloads-generation/models_class1_pan/generate_hyperparameters.py b/downloads-generation/models_class1_pan/generate_hyperparameters.py index ea728dc3..72be989f 100644 --- a/downloads-generation/models_class1_pan/generate_hyperparameters.py +++ b/downloads-generation/models_class1_pan/generate_hyperparameters.py @@ -48,12 +48,14 @@ base_hyperparameters = { } grid = [] -for layer_sizes in [[512, 256], [1024, 512], [1024, 1024]]: - for l1 in [0.0, 0.0001, 0.001, 0.01]: - new = deepcopy(base_hyperparameters) - new["layer_sizes"] = layer_sizes - new["dense_layer_l1_regularization"] = l1 - if not grid or new not in grid: - grid.append(new) +for layer_sizes in [[512, 256], [512, 512],]: + for pretrain in [True, False]: + for l1 in [0.0, 0.0001, 0.001, 0.01]: + new = deepcopy(base_hyperparameters) + new["layer_sizes"] = layer_sizes + new["dense_layer_l1_regularization"] = l1 + new["train_data"]["pretrain"] = pretrain + if not grid or new not in grid: + grid.append(new) dump(grid, stdout) -- GitLab