diff --git a/downloads-generation/models_class1_pan/GENERATE.sh b/downloads-generation/models_class1_pan/GENERATE.sh index ca8800042713c93d442797e4766ca14fc6e432e3..5bf20841c01bc63758e6ce20705658907341a556 100755 --- a/downloads-generation/models_class1_pan/GENERATE.sh +++ b/downloads-generation/models_class1_pan/GENERATE.sh @@ -35,6 +35,9 @@ echo "Detected GPUS: $GPUS" PROCESSORS=$(getconf _NPROCESSORS_ONLN) echo "Detected processors: $PROCESSORS" +export PYTHONUNBUFFERED=1 +VERBOSITY=1 + mhcflurry-class1-train-pan-allele-models \ --data "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \ --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \ @@ -42,11 +45,26 @@ mhcflurry-class1-train-pan-allele-models \ --held-out-measurements-per-allele-fraction-and-max 0.25 100 \ --ensemble-size 4 \ --hyperparameters hyperparameters.yaml \ - --out-models-dir models-unselected \ - --num-jobs 4 --gpus $GPUS --max-workers-per-gpu 1 --max-tasks-per-worker 1 + --out-models-dir models-unselected.with_mass_spec \ + --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ + --verbosity $VERBOSITY \ + --num-jobs $GPUS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 + +mhcflurry-class1-train-pan-allele-models \ + --data "$(mhcflurry-downloads path data_curated)/curated_training_data.no_mass_spec.csv.bz2" \ + --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \ + --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \ + --held-out-measurements-per-allele-fraction-and-max 0.25 100 \ + --ensemble-size 4 \ + --hyperparameters hyperparameters.yaml \ + --out-models-dir models-unselected.no_mass_spec \ + --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ + --verbosity $VERBOSITY \ + --num-jobs $GPUS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 cp $SCRIPT_ABSOLUTE_PATH . bzip2 LOG.txt +for i in $(ls LOG-worker.*.txt) ; do bzip2 $i ; done tar -cjf "../${DOWNLOAD_NAME}.with_unselected.tar.bz2" * echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.with_unselected.tar.bz2" diff --git a/downloads-generation/models_class1_pan/generate_hyperparameters.py b/downloads-generation/models_class1_pan/generate_hyperparameters.py index 72be989f98a54290124b5f2917f2e785ae9b9436..f07639ca06819a6695181955158ec8ce26bb0fff 100644 --- a/downloads-generation/models_class1_pan/generate_hyperparameters.py +++ b/downloads-generation/models_class1_pan/generate_hyperparameters.py @@ -50,7 +50,7 @@ base_hyperparameters = { grid = [] for layer_sizes in [[512, 256], [512, 512],]: for pretrain in [True, False]: - for l1 in [0.0, 0.0001, 0.001, 0.01]: + for l1 in [0.0, 0.00001, 0.0001, 0.001]: new = deepcopy(base_hyperparameters) new["layer_sizes"] = layer_sizes new["dense_layer_l1_regularization"] = l1