diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index 1638075e365a13b4c1d6fcbc4b68abc5805e968f..63cf796076324a9cfcd4ba4064ddb5c68a19ad14 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -86,8 +86,20 @@ class Class1AffinityPredictor(object): self.allele_to_allele_specific_models = allele_to_allele_specific_models self.class1_pan_allele_models = class1_pan_allele_models self.allele_to_fixed_length_sequence = allele_to_fixed_length_sequence + self._manifest_df = manifest_df - if manifest_df is None: + if not allele_to_percent_rank_transform: + allele_to_percent_rank_transform = {} + self.allele_to_percent_rank_transform = allele_to_percent_rank_transform + self.metadata_dataframes = metadata_dataframes + self._cache = {} + + @property + def manifest_df(self): + if self._manifest_df is None: + # Make a manifest based on + # - self.class1_pan_allele_models + # - self.allele_to_allele_specific_models rows = [] for (i, model) in enumerate(self.class1_pan_allele_models): rows.append(( @@ -96,7 +108,8 @@ class Class1AffinityPredictor(object): json.dumps(model.get_config()), model )) - for (allele, models) in self.allele_to_allele_specific_models.items(): + for (allele, + models) in self.allele_to_allele_specific_models.items(): for (i, model) in enumerate(models): rows.append(( self.model_name(allele, i), @@ -104,16 +117,10 @@ class Class1AffinityPredictor(object): json.dumps(model.get_config()), model )) - manifest_df = pandas.DataFrame( + self._manifest_df = pandas.DataFrame( rows, columns=["model_name", "allele", "config_json", "model"]) - self.manifest_df = manifest_df - - if not allele_to_percent_rank_transform: - allele_to_percent_rank_transform = {} - self.allele_to_percent_rank_transform = allele_to_percent_rank_transform - self.metadata_dataframes = metadata_dataframes - self._cache = {} + return self._manifest_df def clear_cache(self): """ @@ -209,7 +216,7 @@ class Class1AffinityPredictor(object): ("config_json", json.dumps(model.get_config())), ("model", model), ])).to_frame().T - self.manifest_df = pandas.concat( + self._manifest_df = pandas.concat( [self.manifest_df, row], ignore_index=True) new_model_names.append(model_name) @@ -225,7 +232,7 @@ class Class1AffinityPredictor(object): ("config_json", json.dumps(model.get_config())), ("model", model), ])).to_frame().T - self.manifest_df = pandas.concat( + self._manifest_df = pandas.concat( [self.manifest_df, row], ignore_index=True) current_models.append(model) new_model_names.append(model_name) @@ -609,7 +616,7 @@ class Class1AffinityPredictor(object): ("config_json", json.dumps(model.get_config())), ("model", model), ])).to_frame().T - self.manifest_df = pandas.concat( + self._manifest_df = pandas.concat( [self.manifest_df, row], ignore_index=True) self.allele_to_allele_specific_models[allele].append(model) if models_dir_for_save: @@ -702,7 +709,7 @@ class Class1AffinityPredictor(object): ("config_json", json.dumps(model.get_config())), ("model", model), ])).to_frame().T - self.manifest_df = pandas.concat( + self._manifest_df = pandas.concat( [self.manifest_df, row], ignore_index=True) if models_dir_for_save: self.save( @@ -1207,7 +1214,9 @@ class Class1AffinityPredictor(object): Class1AffinityPredictor( allele_to_allele_specific_models={ allele: [row.model] + existing_selected - })) + } + ) + ) for (_, row) in df.iterrows() ] diff --git a/mhcflurry/select_allele_specific_models_command.py b/mhcflurry/select_allele_specific_models_command.py index 01587d7614b0d90f4b95715d3139830be03bffe3..336756193c732896d47296c95af92b1aa2469093 100644 --- a/mhcflurry/select_allele_specific_models_command.py +++ b/mhcflurry/select_allele_specific_models_command.py @@ -146,7 +146,7 @@ parser.add_argument( parser.add_argument( "--consensus-num-peptides-per-length", type=int, - default=100000, + default=10000, help="Num peptides per length to use for consensus scoring") parser.add_argument( "--mass-spec-regex", @@ -364,6 +364,13 @@ def model_select(allele): **model_selection_kwargs) +def cache_encoding(predictor, peptides): + # Encode the peptides for each neural network, so the encoding + # becomes cached. + for network in predictor.neural_networks: + network.peptides_to_network_input(peptides) + + class CombinedModelSelector(object): def __init__(self, model_selectors, weights=None): if weights is None: @@ -396,7 +403,7 @@ class ConsensusModelSelector(object): def __init__( self, predictor, - num_peptides_per_length=100000, + num_peptides_per_length=10000, multiply_score_by_value=10.0): (min_length, max_length) = predictor.supported_peptide_lengths @@ -408,11 +415,7 @@ class ConsensusModelSelector(object): self.peptides = EncodableSequences.create(peptides) self.predictor = predictor self.multiply_score_by_value = multiply_score_by_value - - # Encode the peptides for each neural network, so the encoding - # becomes cached. - for network in predictor.neural_networks: - network.peptides_to_network_input(self.peptides) + cache_encoding(self.predictor, self.peptides) def usable_for_allele(self, allele): return True @@ -513,11 +516,7 @@ class MassSpecModelSelector(object): self.multiply_score_by_data_size = multiply_score_by_data_size self.peptides = EncodableSequences.create(full_matrix.index.values) - - # Encode the peptides for each neural network, so the encoding - # becomes cached. - for network in predictor.neural_networks: - network.peptides_to_network_input(self.peptides) + cache_encoding(self.predictor, self.peptides) @staticmethod def ppv(y_true, predictions):