diff --git a/README.md b/README.md index 8bfc7c7242de05890ab2fe07fc37d5537183306d..bf02a88b45a5e4d4a3de740685a082dfc4c8105c 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ notebook for an overview of the Python API, including fitting your own predictor An ensemble of eight single-allele models was trained for each allele with at least 100 measurements in the training set (118 alleles). The models were trained on a random 80% sample of the data for the allele and the remaining 20% was used for -early stopping. All models use the same [architecture](downloads-generation/models_class1/hyperparameters.json). The +early stopping. All models use the same [architecture](downloads-generation/models_class1/hyperparameters.yaml). The predictions are taken to be the geometric mean of the nM binding affinity predictions of the individual models. The training script is [here](downloads-generation/models_class1/GENERATE.sh). diff --git a/downloads-generation/models_class1/GENERATE.sh b/downloads-generation/models_class1/GENERATE.sh index ff06a043083a585eacad7c0ef8425f9cee9f9686..ae98f51ebe9a09381c3dd497d49dfcdd2643dfdb 100755 --- a/downloads-generation/models_class1/GENERATE.sh +++ b/downloads-generation/models_class1/GENERATE.sh @@ -25,11 +25,11 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME mkdir models -cp $SCRIPT_DIR/hyperparameters.json . +cp $SCRIPT_DIR/hyperparameters.yaml . time mhcflurry-class1-train-allele-specific-models \ --data "$(mhcflurry-downloads path data_curated)/curated_training_data.csv.bz2" \ - --hyperparameters hyperparameters.json \ + --hyperparameters hyperparameters.yaml \ --out-models-dir models \ --min-measurements-per-allele 200 diff --git a/downloads-generation/models_class1/hyperparameters.json b/downloads-generation/models_class1/hyperparameters.json deleted file mode 100644 index 75fd5d646d05fd3a655aea0b42527f18892c44cc..0000000000000000000000000000000000000000 --- a/downloads-generation/models_class1/hyperparameters.json +++ /dev/null @@ -1,37 +0,0 @@ -[ - { - "n_models": 12, - "max_epochs": 500, - "patience": 10, - "early_stopping": true, - "validation_split": 0.2, - - "random_negative_rate": 0.0, - "random_negative_constant": 25, - - "use_embedding": false, - "kmer_size": 15, - "batch_normalization": false, - "locally_connected_layers": [ - { - "filters": 8, - "activation": "tanh", - "kernel_size": 3 - }, - { - "filters": 8, - "activation": "tanh", - "kernel_size": 3 - } - ], - "activation": "relu", - "output_activation": "sigmoid", - "layer_sizes": [ - 32 - ], - "random_negative_affinity_min": 20000.0, - "random_negative_affinity_max": 50000.0, - "dense_layer_l1_regularization": 0.001, - "dropout_probability": 0.0 - } -] diff --git a/downloads-generation/models_class1/hyperparameters.yaml b/downloads-generation/models_class1/hyperparameters.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e114b320231a585d8ef78df925caff74dbf43c6 --- /dev/null +++ b/downloads-generation/models_class1/hyperparameters.yaml @@ -0,0 +1,54 @@ +[{ +########################################## +# ENSEMBLE SIZE +########################################## +"n_models": 12, + +########################################## +# OPTIMIZATION +########################################## +"max_epochs": 500, +"patience": 10, +"early_stopping": true, +"validation_split": 0.2, + +########################################## +# RANDOM NEGATIVE PEPTIDES +########################################## +"random_negative_rate": 0.0, +"random_negative_constant": 25, +"random_negative_affinity_min": 20000.0, +"random_negative_affinity_max": 50000.0, + +########################################## +# PEPTIDE REPRESENTATION +########################################## +# One of "one-hot", "embedding", or "BLOSUM62". +"peptide_amino_acid_encoding": "BLOSUM62", +"use_embedding": false, # maintained for backward compatability +"kmer_size": 15, + +########################################## +# NEURAL NETWORK ARCHITECTURE +########################################## +"locally_connected_layers": [ + { + "filters": 8, + "activation": "tanh", + "kernel_size": 3 + }, + { + "filters": 8, + "activation": "tanh", + "kernel_size": 3 + } +], +"activation": "relu", +"output_activation": "sigmoid", +"layer_sizes": [ + 32 +], +"dense_layer_l1_regularization": 0.001, +"batch_normalization": false, +"dropout_probability": 0.0, +}]