Skip to content
Snippets Groups Projects
Commit 95de1b69 authored by Alex Rubinsteyn's avatar Alex Rubinsteyn
Browse files

dont' add --max-ic50 to args twice

parent f7039fb4
No related branches found
No related tags found
No related merge requests found
......@@ -39,18 +39,6 @@ def add_imputation_argument_to_parser(parser):
help="Use the given imputation method to generate data for pre-training models")
return parser
def add_max_ic50_argument_to_parser(parser):
"""
Extends an argument parser with --max-ic50
"""
parser.add_argument(
"--max-ic50",
type=float,
default=MAX_IC50,
help="Largest IC50 represented by neural network output. "
"Default: %(default)s")
return parser
def add_hyperparameter_arguments_to_parser(parser):
"""
......@@ -104,8 +92,8 @@ def add_hyperparameter_arguments_to_parser(parser):
"--max-ic50",
type=float,
default=MAX_IC50,
help="Largest IC50 value representable as output of neural network")
help="Largest IC50 represented by neural network output. "
"Default: %(default)s")
return parser
def add_training_arguments_to_parser(parser):
......@@ -160,7 +148,6 @@ def add_arguments_to_parser(parser):
functions = [
add_hyperparameter_arguments_to_parser,
add_training_arguments_to_parser,
add_max_ic50_argument_to_parser,
add_imputation_argument_to_parser,
]
for fn in functions:
......
......@@ -171,6 +171,16 @@ class Class1AlleleSpecificKmerIC50PredictorBase(IC50PredictorBase):
Extra arguments are passed on to the fit_encoded_kmer_arrays()
method.
"""
if len(dataset.unique_alleles()) > 1:
raise ValueError(
"Allele-specific predictor can't be trained on multi-allele data: %s" % (
dataset,))
if pretraining_dataset and len(pretraining_dataset.unique_alleles()) > 1:
raise ValueError(
"Allele-specific predictor can't pretrain on data from multiple alleles: %s" %
(pretraining_dataset,))
X, ic50, sample_weights, original_peptide_indices = \
dataset.kmer_index_encoding(
kmer_size=self.kmer_size,
......
# Copyright (c) 2015. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import (
print_function,
division,
absolute_import,
)
from collections import OrderedDict
import numpy as np
class ScoreSet(object):
"""
Useful for keeping a collection of score dictionaries
which map name->score type->list of values.
"""
def __init__(self, verbose=True, index="name"):
self.groups = {}
self.verbose = verbose
if isinstance(index, (list, tuple)):
index = ",".join("%s" % item for item in index)
self.index = index
def add_many(self, group, **kwargs):
for (k, v) in sorted(kwargs.items()):
self.add(group, k, v)
def add(self, group, score_type, value):
if isinstance(group, (list, tuple)):
group = ",".join("%s" % item for item in group)
if group not in self.groups:
self.groups[group] = {}
if score_type not in self.groups[group]:
self.groups[group][score_type] = []
self.groups[group][score_type].append(value)
if self.verbose:
print("--> %s:%s %0.4f" % (group, score_type, value))
def score_types(self):
result = set([])
for (g, d) in sorted(self.groups.items()):
for score_type in sorted(d.keys()):
result.add(score_type)
return list(sorted(result))
def _reduce_scores(self, reduce_fn):
score_types = self.score_types()
return {
group:
OrderedDict([
(score_type, reduce_fn(score_dict[score_type]))
for score_type
in score_types
])
for (group, score_dict)
in self.groups.items()
}
def averages(self):
return self._reduce_scores(np.mean)
def stds(self):
return self._reduce_scores(np.std)
def to_csv(self, filename):
with open(filename, "w") as f:
header_list = [self.index]
score_types = self.score_types()
for score_type in score_types:
header_list.append(score_type)
header_list.append(score_type + "_std")
header_line = ",".join(header_list) + "\n"
if self.verbose:
print(header_line)
f.write(header_line)
score_averages = self.averages()
score_stds = self.stds()
for name in sorted(score_averages.keys()):
line_elements = [name]
for score_type in score_types:
line_elements.append(
"%0.4f" % score_averages[name][score_type])
line_elements.append(
"%0.4f" % score_stds[name][score_type])
line = ",".join(line_elements) + "\n"
if self.verbose:
print(line)
f.write(line)
......@@ -113,7 +113,7 @@ def subsample_performance(
for i, n_train in enumerate(sample_sizes):
for _ in range(n_repeats_per_size):
if imputer is None:
dataset_train, dataset_test = dataset.random_split(n_train)
dataset_train, dataset_test = dataset_allele.random_split(n_train)
dataset_imputed = None
else:
dataset_train, dataset_imputed, dataset_test = \
......@@ -128,6 +128,9 @@ def subsample_performance(
allele,
n_train,
n_total))
print("-- Train", dataset_train)
print("-- Imputed", dataset_imputed)
print("-- Test", dataset_test)
# pick a fraction on a log-scale from the minimum to maximum number
# of samples
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment