Skip to content
Snippets Groups Projects
Commit 26471d29 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

Build Class1AffinityPredictor.manifest_df lazily

parent d7e3af61
No related branches found
No related tags found
No related merge requests found
......@@ -86,8 +86,20 @@ class Class1AffinityPredictor(object):
self.allele_to_allele_specific_models = allele_to_allele_specific_models
self.class1_pan_allele_models = class1_pan_allele_models
self.allele_to_fixed_length_sequence = allele_to_fixed_length_sequence
self._manifest_df = manifest_df
if manifest_df is None:
if not allele_to_percent_rank_transform:
allele_to_percent_rank_transform = {}
self.allele_to_percent_rank_transform = allele_to_percent_rank_transform
self.metadata_dataframes = metadata_dataframes
self._cache = {}
@property
def manifest_df(self):
if self._manifest_df is None:
# Make a manifest based on
# - self.class1_pan_allele_models
# - self.allele_to_allele_specific_models
rows = []
for (i, model) in enumerate(self.class1_pan_allele_models):
rows.append((
......@@ -96,7 +108,8 @@ class Class1AffinityPredictor(object):
json.dumps(model.get_config()),
model
))
for (allele, models) in self.allele_to_allele_specific_models.items():
for (allele,
models) in self.allele_to_allele_specific_models.items():
for (i, model) in enumerate(models):
rows.append((
self.model_name(allele, i),
......@@ -104,16 +117,10 @@ class Class1AffinityPredictor(object):
json.dumps(model.get_config()),
model
))
manifest_df = pandas.DataFrame(
self._manifest_df = pandas.DataFrame(
rows,
columns=["model_name", "allele", "config_json", "model"])
self.manifest_df = manifest_df
if not allele_to_percent_rank_transform:
allele_to_percent_rank_transform = {}
self.allele_to_percent_rank_transform = allele_to_percent_rank_transform
self.metadata_dataframes = metadata_dataframes
self._cache = {}
return self._manifest_df
def clear_cache(self):
"""
......@@ -209,7 +216,7 @@ class Class1AffinityPredictor(object):
("config_json", json.dumps(model.get_config())),
("model", model),
])).to_frame().T
self.manifest_df = pandas.concat(
self._manifest_df = pandas.concat(
[self.manifest_df, row], ignore_index=True)
new_model_names.append(model_name)
......@@ -225,7 +232,7 @@ class Class1AffinityPredictor(object):
("config_json", json.dumps(model.get_config())),
("model", model),
])).to_frame().T
self.manifest_df = pandas.concat(
self._manifest_df = pandas.concat(
[self.manifest_df, row], ignore_index=True)
current_models.append(model)
new_model_names.append(model_name)
......@@ -609,7 +616,7 @@ class Class1AffinityPredictor(object):
("config_json", json.dumps(model.get_config())),
("model", model),
])).to_frame().T
self.manifest_df = pandas.concat(
self._manifest_df = pandas.concat(
[self.manifest_df, row], ignore_index=True)
self.allele_to_allele_specific_models[allele].append(model)
if models_dir_for_save:
......@@ -702,7 +709,7 @@ class Class1AffinityPredictor(object):
("config_json", json.dumps(model.get_config())),
("model", model),
])).to_frame().T
self.manifest_df = pandas.concat(
self._manifest_df = pandas.concat(
[self.manifest_df, row], ignore_index=True)
if models_dir_for_save:
self.save(
......@@ -1207,7 +1214,9 @@ class Class1AffinityPredictor(object):
Class1AffinityPredictor(
allele_to_allele_specific_models={
allele: [row.model] + existing_selected
}))
}
)
)
for (_, row) in df.iterrows()
]
......
......@@ -146,7 +146,7 @@ parser.add_argument(
parser.add_argument(
"--consensus-num-peptides-per-length",
type=int,
default=100000,
default=10000,
help="Num peptides per length to use for consensus scoring")
parser.add_argument(
"--mass-spec-regex",
......@@ -364,6 +364,13 @@ def model_select(allele):
**model_selection_kwargs)
def cache_encoding(predictor, peptides):
# Encode the peptides for each neural network, so the encoding
# becomes cached.
for network in predictor.neural_networks:
network.peptides_to_network_input(peptides)
class CombinedModelSelector(object):
def __init__(self, model_selectors, weights=None):
if weights is None:
......@@ -396,7 +403,7 @@ class ConsensusModelSelector(object):
def __init__(
self,
predictor,
num_peptides_per_length=100000,
num_peptides_per_length=10000,
multiply_score_by_value=10.0):
(min_length, max_length) = predictor.supported_peptide_lengths
......@@ -408,11 +415,7 @@ class ConsensusModelSelector(object):
self.peptides = EncodableSequences.create(peptides)
self.predictor = predictor
self.multiply_score_by_value = multiply_score_by_value
# Encode the peptides for each neural network, so the encoding
# becomes cached.
for network in predictor.neural_networks:
network.peptides_to_network_input(self.peptides)
cache_encoding(self.predictor, self.peptides)
def usable_for_allele(self, allele):
return True
......@@ -513,11 +516,7 @@ class MassSpecModelSelector(object):
self.multiply_score_by_data_size = multiply_score_by_data_size
self.peptides = EncodableSequences.create(full_matrix.index.values)
# Encode the peptides for each neural network, so the encoding
# becomes cached.
for network in predictor.neural_networks:
network.peptides_to_network_input(self.peptides)
cache_encoding(self.predictor, self.peptides)
@staticmethod
def ppv(y_true, predictions):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment