diff --git a/mhcflurry/class1_neural_network.py b/mhcflurry/class1_neural_network.py index 7bbdf4b55b8fbdcfbcbb7a48e20b7f01df58b8de..f23ca9773da75b80ec07304e431c4dfec47fa8e8 100644 --- a/mhcflurry/class1_neural_network.py +++ b/mhcflurry/class1_neural_network.py @@ -32,8 +32,7 @@ class Class1NeuralNetwork(object): """ Low level class I predictor consisting of a single neural network. - Both single allele and pan-allele prediction are supported, but pan-allele - is in development and not yet well performing. + Both single allele and pan-allele prediction are supported. Users will generally use Class1AffinityPredictor, which gives a higher-level interface and supports ensembles. @@ -269,12 +268,30 @@ class Class1NeuralNetwork(object): return self._network def update_network_description(self): + """ + Update self.network_json and self.network_weights properties based on + this instances's neural network. + """ if self._network is not None: self.network_json = self._network.to_json() self.network_weights = self._network.get_weights() @staticmethod def keras_network_cache_key(network_json): + """ + Given a Keras JSON description of a neural network, return a key that + uniquely defines this network. Networks that share the same key should + have compatible weights matrices and give the same prediction outputs + when their weights are the same. + + Parameters + ---------- + network_json : string + + Returns + ------- + string + """ # As an optimization, we remove anything about regularization as these # do not affect predictions. def drop_properties(d): @@ -427,6 +444,9 @@ class Class1NeuralNetwork(object): Returns ------- (numpy.array, numpy.array) + + Indices and allele representations. + """ return ( allele_encoding.indices, @@ -444,11 +464,13 @@ class Class1NeuralNetwork(object): Parameters ---------- - method - - Returns - ------- - + network : keras.Model + x_dict : dict of string -> numpy.ndarray + Training data as would be passed keras.Model.fit(). + method : string + Initialization method. Currently only "lsuv" is supported. + verbose : int + Status updates printed to stdout if verbose > 0 """ if verbose: print("Performing data-dependent init: ", method) @@ -479,27 +501,32 @@ class Class1NeuralNetwork(object): Fit using a generator. Does not support many of the features of fit(), such as random negative peptides. + Fitting proceeds until early stopping is hit, using the peptides, + affinities, etc. given by the parameters starting with "validation_". + + This is used for pre-training pan-allele models using data synthesized + by the allele-specific models. + Parameters ---------- generator : generator yielding (alleles, peptides, affinities) tuples where alleles and peptides are lists of strings, and affinities is list of floats. - - validation_peptide_encoding - validation_affinities - validation_allele_encoding - validation_inequalities - validation_output_indices - steps_per_epoch - epochs - patience - verbose - - Returns - ------- - + validation_peptide_encoding : EncodableSequences + validation_affinities : list of float + validation_allele_encoding : AlleleEncoding + validation_inequalities : list of string + validation_output_indices : list of int + steps_per_epoch : int + epochs : int + min_epochs : int + patience : int + min_delta : float + verbose : int + progress_callback : thunk + progress_preamble : string + progress_print_interval : float """ - import keras from keras import backend as K fit_info = collections.defaultdict(list) @@ -668,7 +695,7 @@ class Class1NeuralNetwork(object): affinities : list of float nM affinities. Must be same length of as peptides. - allele_encoding : AlleleEncoding, optional + allele_encoding : AlleleEncoding If not specified, the model will be a single-allele predictor. inequalities : list of string, each element one of ">", "<", or "=". @@ -676,21 +703,27 @@ class Class1NeuralNetwork(object): Each element must be one of ">", "<", or "=". For example, a ">" will train on y_pred > y_true for that element in the training set. Requires using a custom losses that support inequalities (e.g. - mse_with_ineqalities). - If None all inequalities are taken to be "=". - - sample_weights : list of float, optional + mse_with_ineqalities). If None all inequalities are taken to be "=". + + output_indices : list of int + For multi-output models only. Same length as affinities. Indicates + the index of the output (starting from 0) for each training example. + + sample_weights : list of float If not specified, all samples (including random negatives added during training) will have equal weight. If specified, the random negatives will be assigned weight=1.0. - shuffle_permutation : list of int, optional + shuffle_permutation : list of int Permutation (integer list) of same length as peptides and affinities If None, then a random permutation will be generated. verbose : int Keras verbosity level + progress_callback : function + No-argument function to call after each epoch. + progress_preamble : string Optional string of information to include in each progress update @@ -752,8 +785,8 @@ class Class1NeuralNetwork(object): x_dict_without_random_negatives['allele'] = allele_encoding_input # Shuffle y_values and the contents of x_dict_without_random_negatives - # This ensures different data is used for the test set for early stopping - # when multiple models are trained. + # This ensures different data is used for the test set for early + # stopping when multiple models are trained. if shuffle_permutation is None: shuffle_permutation = numpy.random.permutation(len(y_values)) y_values = y_values[shuffle_permutation] @@ -810,7 +843,9 @@ class Class1NeuralNetwork(object): if loss.supports_inequalities: # Do not sample negative affinities: just use an inequality. - random_negative_ic50 = self.hyperparameters['random_negative_affinity_min'] + random_negative_ic50 = self.hyperparameters[ + 'random_negative_affinity_min' + ] random_negative_target = from_ic50(random_negative_ic50) y_dict_with_random_negatives = { @@ -1018,9 +1053,10 @@ class Class1NeuralNetwork(object): Predict affinities. If peptides are specified as EncodableSequences, then the predictions - will be cached for this predictor as long as the EncodableSequences object - remains in memory. The cache is keyed in the object identity of the - EncodableSequences, not the sequences themselves. + will be cached for this predictor as long as the EncodableSequences + object remains in memory. The cache is keyed in the object identity of + the EncodableSequences, not the sequences themselves. The cache is used + only for allele-specific models (i.e. when allele_encoding is None). Parameters ---------- @@ -1032,6 +1068,10 @@ class Class1NeuralNetwork(object): batch_size : int batch_size passed to Keras + output_index : int or None + For multi-output models. Gives the output index to return. If set to + None, then all outputs are returned as a samples x outputs matrix. + Returns ------- numpy.array of nM affinity predictions @@ -1070,7 +1110,7 @@ class Class1NeuralNetwork(object): Merge multiple models at the tensorflow (or other backend) level. Only certain neural network architectures support merging. Others will - throw NotImplementedError. + result in a NotImplementedError. Parameters ---------- @@ -1091,12 +1131,6 @@ class Class1NeuralNetwork(object): if len(models) == 1: return models[0] - - # Copy models since we are going to mutate their underlying networks - models = [ - pickle.loads(pickle.dumps(model, protocol=pickle.HIGHEST_PROTOCOL)) - for model in models - ] assert len(models) > 1 result = Class1NeuralNetwork(**dict(models[0].hyperparameters)) @@ -1118,13 +1152,13 @@ class Class1NeuralNetwork(object): for network in networks ] - pan_allele_layer_names1 = [ + pan_allele_layer_names = [ 'allele', 'peptide', 'allele_representation', 'flattened_0', 'allele_flat', 'allele_peptide_merged', 'dense_0', 'dropout_0', 'dense_1', 'dropout_1', 'output', ] - if all(names == pan_allele_layer_names1 for names in layer_names): + if all(names == pan_allele_layer_names for names in layer_names): # Merging an ensemble of pan-allele architectures network = networks[0] peptide_input = Input( @@ -1146,7 +1180,7 @@ class Class1NeuralNetwork(object): sub_networks = [] for (i, network) in enumerate(networks): layers = network.layers[ - pan_allele_layer_names1.index("allele_peptide_merged") + 1: + pan_allele_layer_names.index("allele_peptide_merged") + 1: ] node = allele_peptide_merged for layer in layers: @@ -1176,7 +1210,6 @@ class Class1NeuralNetwork(object): layer_names) return result - def make_network( self, peptide_encoding, @@ -1197,7 +1230,7 @@ class Class1NeuralNetwork(object): num_outputs=1, allele_representations=None): """ - Helper function to make a keras network for class1 affinity prediction. + Helper function to make a keras network for class 1 affinity prediction. """ # We import keras here to avoid tensorflow debug output, etc. unless we @@ -1314,12 +1347,24 @@ class Class1NeuralNetwork(object): def set_allele_representations(self, allele_representations): """ + Set the allele representations in use by this model. This means mutating + the weights for the allele input embedding layer. + + Rationale: instead of passing in the allele sequence for each data point + during model training or prediction (which is expensive in terms of + memory usage), we pass in an allele index between 0 and n-1 where n is + the number of alleles in some universe of possible alleles. This index + is used in the model to lookup the corresponding allele sequence. This + function sets the lookup table. + + See also: AlleleEncoding.allele_representations() Parameters ---------- - model - allele_representations - + allele_representations : numpy.ndarray of shape (a, l, m) + where a is the total number of alleles, + l is the allele sequence length, + m is the length of the vectors used to represent amino acids """ from keras.models import model_from_json reshaped = allele_representations.reshape(