Skip to content
Snippets Groups Projects
Commit 505899a5 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

Include training data subset as a hyperparameter

parent 8b9e468f
No related branches found
No related tags found
No related merge requests found
......@@ -55,26 +55,33 @@ base_hyperparameters = {
"dense_layer_l1_regularization": None,
"batch_normalization": False,
"dropout_probability": 0.0,
##########################################
# TRAINING Data
##########################################
"train_data": {"subset": "all"},
}
grid = []
for minibatch_size in [32, 128, 512]:
for dense_layer_size in [8, 16, 32, 64, 128]:
for l1 in [0.0, 0.001, 0.01, 0.1]:
for num_lc in [0, 1, 2]:
for lc_kernel_size in [3, 5]:
new = deepcopy(base_hyperparameters)
new["layer_sizes"] = [dense_layer_size]
new["dense_layer_l1_regularization"] = l1
(lc_layer,) = new["locally_connected_layers"]
lc_layer['kernel_size'] = lc_kernel_size
if num_lc == 0:
new["locally_connected_layers"] = []
elif num_lc == 1:
new["locally_connected_layers"] = [lc_layer]
elif num_lc == 2:
new["locally_connected_layers"] = [lc_layer, deepcopy(lc_layer)]
if not grid or new not in grid:
grid.append(new)
for train_subset in ["all", "quantitative"]:
for minibatch_size in [128, 512]:
for dense_layer_size in [8, 16, 32, 64]:
for l1 in [0.0, 0.001, 0.01]:
for num_lc in [0, 1, 2]:
for lc_kernel_size in [3, 5]:
new = deepcopy(base_hyperparameters)
new["train_data"]["subset"] = train_subset
new["layer_sizes"] = [dense_layer_size]
new["dense_layer_l1_regularization"] = l1
(lc_layer,) = new["locally_connected_layers"]
lc_layer['kernel_size'] = lc_kernel_size
if num_lc == 0:
new["locally_connected_layers"] = []
elif num_lc == 1:
new["locally_connected_layers"] = [lc_layer]
elif num_lc == 2:
new["locally_connected_layers"] = [lc_layer, deepcopy(lc_layer)]
if not grid or new not in grid:
grid.append(new)
dump(grid, stdout)
......@@ -102,11 +102,21 @@ class Class1NeuralNetwork(object):
Hyperparameters for early stopping.
"""
miscelaneous_hyperparameter_defaults = HyperparameterDefaults(
train_data={'subset': 'all', 'num_poitns': None},
)
"""
Miscelaneous hyperaparameters. These parameters are not used by this class
but may be interpreted by other code.
"""
hyperparameter_defaults = network_hyperparameter_defaults.extend(
compile_hyperparameter_defaults).extend(
input_encoding_hyperparameter_defaults).extend(
fit_hyperparameter_defaults).extend(
early_stopping_hyperparameter_defaults)
early_stopping_hyperparameter_defaults).extend(
miscelaneous_hyperparameter_defaults
)
"""
Combined set of all supported hyperparameters and their default values.
"""
......
......@@ -403,6 +403,18 @@ def train_model(
full_data = GLOBAL_DATA["train_data"]
data = full_data.loc[full_data.allele == allele]
subset = hyperparameters.get("train_data", {}).get("subset", "all")
if subset == "quantitative":
data = data.loc[
data.measurement_type == "quantitative"
]
elif subset == "all":
pass
else:
raise ValueError("Unsupported subset: %s" % subset)
hyperparameters.setdefault("train_data", {})["num_points"] = len(data)
progress_preamble = (
"[%2d / %2d hyperparameters] "
"[%4d / %4d alleles] "
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment