diff --git a/downloads-generation/models_class1/generate_hyperparameters.py b/downloads-generation/models_class1/generate_hyperparameters.py index c130471f42e62fb9424ec1e5535c8cfc295337c2..457678799b5b4101e76ee944c6963cd784e5fc72 100644 --- a/downloads-generation/models_class1/generate_hyperparameters.py +++ b/downloads-generation/models_class1/generate_hyperparameters.py @@ -55,26 +55,33 @@ base_hyperparameters = { "dense_layer_l1_regularization": None, "batch_normalization": False, "dropout_probability": 0.0, + + ########################################## + # TRAINING Data + ########################################## + "train_data": {"subset": "all"}, } grid = [] -for minibatch_size in [32, 128, 512]: - for dense_layer_size in [8, 16, 32, 64, 128]: - for l1 in [0.0, 0.001, 0.01, 0.1]: - for num_lc in [0, 1, 2]: - for lc_kernel_size in [3, 5]: - new = deepcopy(base_hyperparameters) - new["layer_sizes"] = [dense_layer_size] - new["dense_layer_l1_regularization"] = l1 - (lc_layer,) = new["locally_connected_layers"] - lc_layer['kernel_size'] = lc_kernel_size - if num_lc == 0: - new["locally_connected_layers"] = [] - elif num_lc == 1: - new["locally_connected_layers"] = [lc_layer] - elif num_lc == 2: - new["locally_connected_layers"] = [lc_layer, deepcopy(lc_layer)] - if not grid or new not in grid: - grid.append(new) +for train_subset in ["all", "quantitative"]: + for minibatch_size in [128, 512]: + for dense_layer_size in [8, 16, 32, 64]: + for l1 in [0.0, 0.001, 0.01]: + for num_lc in [0, 1, 2]: + for lc_kernel_size in [3, 5]: + new = deepcopy(base_hyperparameters) + new["train_data"]["subset"] = train_subset + new["layer_sizes"] = [dense_layer_size] + new["dense_layer_l1_regularization"] = l1 + (lc_layer,) = new["locally_connected_layers"] + lc_layer['kernel_size'] = lc_kernel_size + if num_lc == 0: + new["locally_connected_layers"] = [] + elif num_lc == 1: + new["locally_connected_layers"] = [lc_layer] + elif num_lc == 2: + new["locally_connected_layers"] = [lc_layer, deepcopy(lc_layer)] + if not grid or new not in grid: + grid.append(new) dump(grid, stdout) diff --git a/mhcflurry/class1_neural_network.py b/mhcflurry/class1_neural_network.py index 4b9e78783db73b4e4fcf43e8104814a22af862ac..4bc8331b165bbee85ddc4c3272659fdf25bab1c3 100644 --- a/mhcflurry/class1_neural_network.py +++ b/mhcflurry/class1_neural_network.py @@ -102,11 +102,21 @@ class Class1NeuralNetwork(object): Hyperparameters for early stopping. """ + miscelaneous_hyperparameter_defaults = HyperparameterDefaults( + train_data={'subset': 'all', 'num_poitns': None}, + ) + """ + Miscelaneous hyperaparameters. These parameters are not used by this class + but may be interpreted by other code. + """ + hyperparameter_defaults = network_hyperparameter_defaults.extend( compile_hyperparameter_defaults).extend( input_encoding_hyperparameter_defaults).extend( fit_hyperparameter_defaults).extend( - early_stopping_hyperparameter_defaults) + early_stopping_hyperparameter_defaults).extend( + miscelaneous_hyperparameter_defaults + ) """ Combined set of all supported hyperparameters and their default values. """ diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py index cd5505c133a8e95007d225cd72e3ab227163f10d..40717a7d76b30beb45797b727eb975ceb416fcbb 100644 --- a/mhcflurry/train_allele_specific_models_command.py +++ b/mhcflurry/train_allele_specific_models_command.py @@ -403,6 +403,18 @@ def train_model( full_data = GLOBAL_DATA["train_data"] data = full_data.loc[full_data.allele == allele] + subset = hyperparameters.get("train_data", {}).get("subset", "all") + if subset == "quantitative": + data = data.loc[ + data.measurement_type == "quantitative" + ] + elif subset == "all": + pass + else: + raise ValueError("Unsupported subset: %s" % subset) + + hyperparameters.setdefault("train_data", {})["num_points"] = len(data) + progress_preamble = ( "[%2d / %2d hyperparameters] " "[%4d / %4d alleles] "