class1_neural_network.py

import time
import collections
import logging
import json
import weakref
import itertools

import numpy
import pandas

from .hyperparameters import HyperparameterDefaults

from .encodable_sequences import EncodableSequences, EncodingError
from .regression_target import to_ic50, from_ic50
from .common import random_peptides, amino_acid_distribution
from .custom_loss import get_loss
from .data_dependent_weights_initialization import lsuv_init


class Class1NeuralNetwork(object):
    """
    Low level class I predictor consisting of a single neural network.
    
    Both single allele and pan-allele prediction are supported, but pan-allele
    is in development and not yet well performing.
    
    Users will generally use Class1AffinityPredictor, which gives a higher-level
    interface and supports ensembles.
    """

    network_hyperparameter_defaults = HyperparameterDefaults(
        allele_amino_acid_encoding="BLOSUM62",
        allele_dense_layer_sizes=[],
        peptide_encoding={
            'vector_encoding_name': 'BLOSUM62',
            'alignment_method': 'pad_middle',
            'left_edge': 4,
            'right_edge': 4,
            'max_length': 15,
        },
        peptide_dense_layer_sizes=[],
        peptide_allele_merge_method="multiply",
        peptide_allele_merge_activation="",
        layer_sizes=[32],
        dense_layer_l1_regularization=0.001,
        dense_layer_l2_regularization=0.0,
        activation="tanh",
        init="glorot_uniform",
        output_activation="sigmoid",
        dropout_probability=0.0,
        batch_normalization=False,
        locally_connected_layers=[
            {
                "filters": 8,
                "activation": "tanh",
                "kernel_size": 3
            }
        ],
        num_outputs=1,
    )
    """
    Hyperparameters (and their default values) that affect the neural network
    architecture.
    """

    compile_hyperparameter_defaults = HyperparameterDefaults(
        loss="custom:mse_with_inequalities",
        optimizer="rmsprop",
        learning_rate=None,
    )
    """
    Loss and optimizer hyperparameters. Any values supported by keras may be
    used.
    """

    fit_hyperparameter_defaults = HyperparameterDefaults(
        max_epochs=500,
        validation_split=0.1,
        early_stopping=True,
        minibatch_size=128,
        data_dependent_initialization_method=None,
        random_negative_rate=0.0,
        random_negative_constant=25,
        random_negative_affinity_min=20000.0,
        random_negative_affinity_max=50000.0,
        random_negative_match_distribution=True,
        random_negative_distribution_smoothing=0.0,
        random_negative_output_indices=None)
    """
    Hyperparameters for neural network training.
    """

    early_stopping_hyperparameter_defaults = HyperparameterDefaults(
        patience=20,
        min_delta=0.0,
    )
    """
    Hyperparameters for early stopping.
    """

    miscelaneous_hyperparameter_defaults = HyperparameterDefaults(
        train_data={},
    )
    """
    Miscelaneous hyperaparameters. These parameters are not used by this class
    but may be interpreted by other code.
    """

    hyperparameter_defaults = network_hyperparameter_defaults.extend(
        compile_hyperparameter_defaults).extend(
        fit_hyperparameter_defaults).extend(
        early_stopping_hyperparameter_defaults).extend(
        miscelaneous_hyperparameter_defaults
    )
    """
    Combined set of all supported hyperparameters and their default values.
    """

    # Hyperparameter renames.
    # These are updated from time to time as new versions are developed. It
    # provides a primitive way to allow new code to work with models trained
    # using older code.
    # None indicates the hyperparameter has been dropped.
    hyperparameter_renames = {
        "use_embedding": None,
        "pseudosequence_use_embedding": None,
        "monitor": None,
        "min_delta": None,
        "verbose": None,
        "mode": None,
        "take_best_epoch": None,
        'kmer_size': None,
        'peptide_amino_acid_encoding': None,
        'embedding_input_dim': None,
        'embedding_output_dim': None,
        'embedding_init_method': None,
        'left_edge': None,
        'right_edge': None,
    }

    @classmethod
    def apply_hyperparameter_renames(cls, hyperparameters):
        """
        Handle hyperparameter renames.

        Parameters
        ----------
        hyperparameters : dict

        Returns
        -------
        dict : updated hyperparameters

        """
        for (from_name, to_name) in cls.hyperparameter_renames.items():
            if from_name in hyperparameters:
                value = hyperparameters.pop(from_name)
                if to_name:
                    hyperparameters[to_name] = value
        return hyperparameters

    def __init__(self, **hyperparameters):
        self.hyperparameters = self.hyperparameter_defaults.with_defaults(
            self.apply_hyperparameter_renames(hyperparameters))

        self._network = None
        self.network_json = None
        self.network_weights = None
        self.network_weights_loader = None

        self.fit_info = []
        self.prediction_cache = weakref.WeakKeyDictionary()

    KERAS_MODELS_CACHE = {}
    """
    Process-wide keras model cache, a map from: architecture JSON string to
    (Keras model, existing network weights)
    """

    @classmethod
    def clear_model_cache(klass):
        """
        Clear the Keras model cache.
        """
        klass.KERAS_MODELS_CACHE.clear()

    @classmethod
    def borrow_cached_network(klass, network_json, network_weights):
        """
        Return a keras Model with the specified architecture and weights.
        As an optimization, when possible this will reuse architectures from a
        process-wide cache.

        The returned object is "borrowed" in the sense that its weights can
        change later after subsequent calls to this method from other objects.

        If you're using this from a parallel implementation you'll need to
        hold a lock while using the returned object.

        Parameters
        ----------
        network_json : string of JSON
        network_weights : list of numpy.array

        Returns
        -------
        keras.models.Model
        """
        assert network_weights is not None
        key = klass.keras_network_cache_key(network_json)
        if key not in klass.KERAS_MODELS_CACHE:
            # Cache miss.
            import keras.models
            network = keras.models.model_from_json(network_json)
            existing_weights = None
        else:
            # Cache hit.
            (network, existing_weights) = klass.KERAS_MODELS_CACHE[key]
        if existing_weights is not network_weights:
            network.set_weights(network_weights)
            klass.KERAS_MODELS_CACHE[key] = (network, network_weights)

        # As an added safety check we overwrite the fit method on the returned
        # model to throw an error if it is called.
        def throw(*args, **kwargs):
            raise NotImplementedError("Do not call fit on cached model.")

        network.fit = throw
        return network

    def network(self, borrow=False):
        """
        Return the keras model associated with this predictor.

        Parameters
        ----------
        borrow : bool
            Whether to return a cached model if possible. See
            borrow_cached_network for details

        Returns
        -------
        keras.models.Model
        """
        if self._network is None and self.network_json is not None:
            self.load_weights()
            if borrow:
                return self.borrow_cached_network(
                    self.network_json,
                    self.network_weights)
            else:
                import keras.models
                self._network = keras.models.model_from_json(self.network_json)
                if self.network_weights is not None:
                    self._network.set_weights(self.network_weights)
                self.network_json = None
                self.network_weights = None
        return self._network

    def update_network_description(self):
        if self._network is not None:
            self.network_json = self._network.to_json()
            self.network_weights = self._network.get_weights()

    @staticmethod
    def keras_network_cache_key(network_json):
        # As an optimization, we remove anything about regularization as these
        # do not affect predictions.
        def drop_properties(d):
            if 'kernel_regularizer' in d:
                del d['kernel_regularizer']
            return d

        description = json.loads(
            network_json,
            object_hook=drop_properties)
        return json.dumps(description)

    def get_config(self):
        """
        serialize to a dict all attributes except model weights
        
        Returns
        -------
        dict
        """
        self.update_network_description()
        result = dict(self.__dict__)
        result['_network'] = None
        result['network_weights'] = None
        result['network_weights_loader'] = None
        result['prediction_cache'] = None
        return result

    @classmethod
    def from_config(cls, config, weights=None, weights_loader=None):
        """
        deserialize from a dict returned by get_config().
        
        Parameters
        ----------
        config : dict
        weights : list of array, optional
            Network weights to restore
        weights_loader : callable, optional
            Function to call (no arguments) to load weights when needed

        Returns
        -------
        Class1NeuralNetwork
        """
        config = dict(config)
        instance = cls(**config.pop('hyperparameters'))
        instance.__dict__.update(config)
        instance.network_weights = weights
        instance.network_weights_loader = weights_loader
        instance.prediction_cache = weakref.WeakKeyDictionary()
        return instance

    def load_weights(self):
        """
        Load weights by evaluating self.network_weights_loader, if needed.

        After calling this, self.network_weights_loader will be None and
        self.network_weights will be the weights list, if available.
        """
        if self.network_weights_loader:
            self.network_weights = self.network_weights_loader()
            self.network_weights_loader = None

    def get_weights(self):
        """
        Get the network weights
        
        Returns
        -------
        list of numpy.array giving weights for each layer or None if there is no
        network
        """
        self.update_network_description()
        self.load_weights()
        return self.network_weights

    def __getstate__(self):
        """
        serialize to a dict. Model weights are included. For pickle support.
        
        Returns
        -------
        dict

        """
        self.update_network_description()
        self.load_weights()
        result = dict(self.__dict__)
        result['_network'] = None
        result['prediction_cache'] = None
        return result

    def __setstate__(self, state):
        """
        Deserialize. For pickle support.
        """
        self.__dict__.update(state)
        self.prediction_cache = weakref.WeakKeyDictionary()

    def peptides_to_network_input(self, peptides):
        """
        Encode peptides to the fixed-length encoding expected by the neural
        network (which depends on the architecture).
        
        Parameters
        ----------
        peptides : EncodableSequences or list of string

        Returns
        -------
        numpy.array
        """
        encoder = EncodableSequences.create(peptides)
        encoded = encoder.variable_length_to_fixed_length_vector_encoding(
            **self.hyperparameters['peptide_encoding'])
        assert len(encoded) == len(peptides)
        return encoded

    @property
    def supported_peptide_lengths(self):
        """
        (minimum, maximum) lengths of peptides supported, inclusive.
        
        Returns
        -------
        (int, int) tuple

        """
        # We currently have an arbitrary hard floor of 5, even if the underlying
        # peptide encoding supports smaller lengths.
        #
        # We empirically find the supported peptide lengths based on the
        # lengths for which peptides_to_network_input throws ValueError.
        try:
            self.peptides_to_network_input([""])
        except EncodingError as e:
            return e.supported_peptide_lengths
        raise RuntimeError("peptides_to_network_input did not raise")

    def allele_encoding_to_network_input(self, allele_encoding):
        """
        Encode alleles to the fixed-length encoding expected by the neural
        network (which depends on the architecture).

        Parameters
        ----------
        allele_encoding : AlleleEncoding

        Returns
        -------
        (numpy.array, numpy.array)
        """
        return (
            allele_encoding.indices,
            allele_encoding.allele_representations(
                self.hyperparameters['allele_amino_acid_encoding']))

    @staticmethod
    def data_dependent_weights_initialization(
            network,
            x_dict=None,
            method="lsuv",
            verbose=1):
        """
        Data dependent weights initialization.

        Parameters
        ----------
        method

        Returns
        -------

        """
        if verbose:
            print("Performing data-dependent init: ", method)
        if method == "lsuv":
            assert x_dict is not None, "Data required for LSUV init"
            lsuv_init(network, x_dict, verbose=verbose > 0)
        else:
            raise RuntimeError("Unsupported init method: ", method)

    def fit_generator(
            self,
            generator,
            validation_peptide_encoding,
            validation_affinities,
            validation_allele_encoding=None,
            validation_inequalities=None,
            validation_output_indices=None,
            steps_per_epoch=10,
            epochs=1000,
            min_epochs=0,
            patience=10,
            min_delta=0.0,
            verbose=1,
            progress_callback=None,
            progress_preamble="",
            progress_print_interval=5.0):
        """
        Fit using a generator. Does not support many of the features of fit(),
        such as random negative peptides.

        Parameters
        ----------
        generator : generator yielding (alleles, peptides, affinities) tuples
            where alleles and peptides are lists of strings, and affinities
            is list of floats.

        validation_peptide_encoding
        validation_affinities
        validation_allele_encoding
        validation_inequalities
        validation_output_indices
        steps_per_epoch
        epochs
        patience
        verbose

        Returns
        -------

        """
        import keras
        from keras import backend as K

        fit_info = collections.defaultdict(list)

        loss = get_loss(self.hyperparameters['loss'])

        (validation_allele_input, allele_representations) = (
            self.allele_encoding_to_network_input(validation_allele_encoding))

        if self.network() is None:
            self._network = self.make_network(
                allele_representations=allele_representations,
                **self.network_hyperparameter_defaults.subselect(
                    self.hyperparameters))
            if verbose > 0:
                self.network().summary()
        network = self.network()

        network.compile(
            loss=loss.loss, optimizer=self.hyperparameters['optimizer'])
        network._make_predict_function()
        self.set_allele_representations(allele_representations)

        if self.hyperparameters['learning_rate'] is not None:
            K.set_value(
                self.network().optimizer.lr,
                self.hyperparameters['learning_rate'])
        fit_info["learning_rate"] = float(
            K.get_value(self.network().optimizer.lr))

        validation_x_dict = {
            'peptide': self.peptides_to_network_input(
                validation_peptide_encoding),
            'allele': validation_allele_input,
        }
        encode_y_kwargs = {}
        if validation_inequalities is not None:
            encode_y_kwargs["inequalities"] = validation_inequalities
        if validation_output_indices is not None:
            encode_y_kwargs["output_indices"] = validation_output_indices

        output = loss.encode_y(
            from_ic50(validation_affinities), **encode_y_kwargs)

        validation_y_dict = {
            'output': output,
        }

        mutable_generator_state = {
            'yielded_values': 0  # total number of data points yielded
        }

        def wrapped_generator():
            for (alleles, peptides, affinities) in generator:
                (allele_encoding_input, _) = (
                    self.allele_encoding_to_network_input(alleles))
                x_dict = {
                    'peptide': self.peptides_to_network_input(peptides),
                    'allele': allele_encoding_input,
                }
                y_dict = {
                    'output': from_ic50(affinities)
                }
                yield (x_dict, y_dict)
                mutable_generator_state['yielded_values'] += len(affinities)

        start = time.time()

        iterator = wrapped_generator()

        # Initialization required if a data_dependent_initialization_method
        # is set and this is our first time fitting (i.e. fit_info is empty).
        data_dependent_init = self.hyperparameters[
            'data_dependent_initialization_method'
        ]
        if data_dependent_init and not self.fit_info:
            first_chunk = next(iterator)
            self.data_dependent_weights_initialization(
                network,
                first_chunk[0],  # x_dict
                method=data_dependent_init,
                verbose=verbose)
            iterator = itertools.chain([first_chunk], iterator)

        min_val_loss_iteration = None
        min_val_loss = None
        last_progress_print = 0
        epoch = 1
        while True:
            epoch_start_time = time.time()
            fit_history = network.fit_generator(
                iterator,
                steps_per_epoch=steps_per_epoch,
                initial_epoch=epoch - 1,
                epochs=epoch,
                use_multiprocessing=False,
                workers=1,
                validation_data=(validation_x_dict, validation_y_dict),
                verbose=verbose,
            )
            epoch_time = time.time() - epoch_start_time
            for (key, value) in fit_history.history.items():
                fit_info[key].extend(value)
            val_loss = fit_info['val_loss'][-1]

            if min_val_loss is None or val_loss < min_val_loss - min_delta:
                min_val_loss = val_loss
                min_val_loss_iteration = epoch

            patience_epoch_threshold = min(
                epochs, max(min_val_loss_iteration + patience, min_epochs))

            progress_message = (
                "epoch %3d/%3d [%0.2f sec.]: loss=%g val_loss=%g. Min val "
                "loss %g at epoch %s. Cum. points: %d. Stop at epoch %d." % (
                    epoch,
                    epochs,
                    epoch_time,
                    fit_info['loss'][-1],
                    val_loss,
                    min_val_loss,
                    min_val_loss_iteration,
                    mutable_generator_state['yielded_values'],
                    patience_epoch_threshold,
                )).strip()

            # Print progress no more often than once every few seconds.
            if progress_print_interval is not None and (
                    time.time() - last_progress_print > progress_print_interval):
                print(progress_preamble, progress_message)
                last_progress_print = time.time()

            if progress_callback:
                progress_callback()

            if epoch >= patience_epoch_threshold:
                if progress_print_interval is not None:
                    print(progress_preamble, "STOPPING", progress_message)
                    break
            epoch += 1

        fit_info["time"] = time.time() - start
        fit_info["num_points"] = mutable_generator_state["yielded_values"]
        self.fit_info.append(dict(fit_info))

    def fit(
            self,
            peptides,
            affinities,
            allele_encoding=None,
            inequalities=None,
            output_indices=None,
            sample_weights=None,
            shuffle_permutation=None,
            verbose=1,
            progress_callback=None,
            progress_preamble="",
            progress_print_interval=5.0):
        """
        Fit the neural network.
        
        Parameters
        ----------
        peptides : EncodableSequences or list of string
        
        affinities : list of float
            nM affinities. Must be same length of as peptides.
        
        allele_encoding : AlleleEncoding, optional
            If not specified, the model will be a single-allele predictor.

        inequalities : list of string, each element one of ">", "<", or "=".
            Inequalities to use for fitting. Same length as affinities.
            Each element must be one of ">", "<", or "=". For example, a ">"
            will train on y_pred > y_true for that element in the training set.
            Requires using a custom losses that support inequalities (e.g.
            mse_with_ineqalities).
            If None all inequalities are taken to be "=".
            
        sample_weights : list of float, optional
            If not specified, all samples (including random negatives added
            during training) will have equal weight. If specified, the random
            negatives will be assigned weight=1.0.

        shuffle_permutation : list of int, optional
            Permutation (integer list) of same length as peptides and affinities
            If None, then a random permutation will be generated.

        verbose : int
            Keras verbosity level

        progress_preamble : string
            Optional string of information to include in each progress update

        progress_print_interval : float
            How often (in seconds) to print progress update. Set to None to
            disable.
        """
        from keras import backend as K
        encodable_peptides = EncodableSequences.create(peptides)
        peptide_encoding = self.peptides_to_network_input(encodable_peptides)
        fit_info = collections.defaultdict(list)

        length_counts = (
            pandas.Series(encodable_peptides.sequences)
            .str.len().value_counts().to_dict())

        num_random_negative = {}
        for length in range(8, 16):
            num_random_negative[length] = int(
                length_counts.get(length, 0) *
                self.hyperparameters['random_negative_rate'] +
                self.hyperparameters['random_negative_constant'])
        num_random_negative = pandas.Series(num_random_negative)
        logging.info("Random negative counts per length:\n%s" % (
            str(num_random_negative.to_dict())))

        aa_distribution = None
        if self.hyperparameters['random_negative_match_distribution']:
            aa_distribution = amino_acid_distribution(
                encodable_peptides.sequences,
                smoothing=self.hyperparameters[
                    'random_negative_distribution_smoothing'])
            logging.info(
                "Using amino acid distribution for random negative:\n%s" % (
                str(aa_distribution.to_dict())))

        y_values = from_ic50(numpy.array(affinities, copy=False))
        assert numpy.isnan(y_values).sum() == 0, y_values
        if inequalities is not None:
            # Reverse inequalities because from_ic50() flips the direction
            # (i.e. lower affinity results in higher y values).
            adjusted_inequalities = pandas.Series(inequalities).map({
                "=": "=",
                ">": "<",
                "<": ">",
            }).values
        else:
            adjusted_inequalities = numpy.tile("=", len(y_values))
        if len(adjusted_inequalities) != len(y_values):
            raise ValueError("Inequalities and y_values must have same length")

        x_dict_without_random_negatives = {
            'peptide': peptide_encoding,
        }
        allele_representations = None
        if allele_encoding is not None:
            (allele_encoding_input, allele_representations) = (
                self.allele_encoding_to_network_input(allele_encoding))
            x_dict_without_random_negatives['allele'] = allele_encoding_input

        # Shuffle y_values and the contents of x_dict_without_random_negatives
        # This ensures different data is used for the test set for early stopping
        # when multiple models are trained.
        if shuffle_permutation is None:
            shuffle_permutation = numpy.random.permutation(len(y_values))
        y_values = y_values[shuffle_permutation]
        assert numpy.isnan(y_values).sum() == 0, y_values
        peptide_encoding = peptide_encoding[shuffle_permutation]
        adjusted_inequalities = adjusted_inequalities[shuffle_permutation]
        for key in x_dict_without_random_negatives:
            x_dict_without_random_negatives[key] = (
                x_dict_without_random_negatives[key][shuffle_permutation])
        if sample_weights is not None:
            sample_weights = numpy.array(sample_weights, copy=False)[
                shuffle_permutation
            ]
        if output_indices is not None:
            output_indices = numpy.array(output_indices, copy=False)[
                shuffle_permutation
            ]

        loss = get_loss(self.hyperparameters['loss'])

        if not loss.supports_inequalities and (
                any(inequality != "=" for inequality in adjusted_inequalities)):
            raise ValueError("Loss %s does not support inequalities" % loss)

        if (not loss.supports_multiple_outputs and output_indices is not None
                and (output_indices != 0).any()):
            raise ValueError("Loss %s does not support multiple outputs" % loss)

        if self.hyperparameters['num_outputs'] != 1:
            if output_indices is None:
                raise ValueError(
                    "Must supply output_indices for multi-output predictor")

        if self.network() is None:
            self._network = self.make_network(
                allele_representations=allele_representations,
                **self.network_hyperparameter_defaults.subselect(
                    self.hyperparameters))
            if verbose > 0:
                self.network().summary()

        if allele_representations is not None:
            self.set_allele_representations(allele_representations)

        self.network().compile(
            loss=loss.loss, optimizer=self.hyperparameters['optimizer'])

        if self.hyperparameters['learning_rate'] is not None:
            K.set_value(
                self.network().optimizer.lr,
                self.hyperparameters['learning_rate'])
        fit_info["learning_rate"] = float(
            K.get_value(self.network().optimizer.lr))

        if loss.supports_inequalities:
            # Do not sample negative affinities: just use an inequality.
            random_negative_ic50 = self.hyperparameters['random_negative_affinity_min']
            random_negative_target = from_ic50(random_negative_ic50)

            y_dict_with_random_negatives = {
                "output": numpy.concatenate([
                    numpy.tile(
                        random_negative_target, int(num_random_negative.sum())),
                    y_values,
                ]),
            }
            # Note: we are using "<" here not ">" because the inequalities are
            # now in target-space (0-1) not affinity-space.
            adjusted_inequalities_with_random_negatives = (
                ["<"] * int(num_random_negative.sum()) +
                list(adjusted_inequalities))
        else:
            # Randomly sample random negative affinities
            y_dict_with_random_negatives = {
                "output": numpy.concatenate([
                    from_ic50(
                        numpy.random.uniform(
                            self.hyperparameters[
                                'random_negative_affinity_min'],
                            self.hyperparameters[
                                'random_negative_affinity_max'],
                            int(num_random_negative.sum()))),
                    y_values,
                ]),
            }
            adjusted_inequalities_with_random_negatives = None
        assert numpy.isnan(y_dict_with_random_negatives['output']).sum() == 0, (
            y_dict_with_random_negatives)
        if sample_weights is not None:
            sample_weights_with_random_negatives = numpy.concatenate([
                numpy.ones(int(num_random_negative.sum())),
                sample_weights])
        else:
            sample_weights_with_random_negatives = None

        if output_indices is not None:
            random_negative_output_indices = (
                self.hyperparameters['random_negative_output_indices']
                if self.hyperparameters['random_negative_output_indices']
                else list(range(0, self.hyperparameters['num_outputs'])))
            output_indices_with_random_negatives = numpy.concatenate([
                pandas.Series(random_negative_output_indices, dtype=int).sample(
                    n=int(num_random_negative.sum()), replace=True).values,
                output_indices
            ])
        else:
            output_indices_with_random_negatives = None

        encode_y_kwargs = {}
        if adjusted_inequalities_with_random_negatives is not None:
            encode_y_kwargs["inequalities"] = (
                adjusted_inequalities_with_random_negatives)
        if output_indices_with_random_negatives is not None:
            encode_y_kwargs["output_indices"] = (
                output_indices_with_random_negatives)

        y_dict_with_random_negatives['output'] = loss.encode_y(
            y_dict_with_random_negatives['output'],
            **encode_y_kwargs)

        min_val_loss_iteration = None
        min_val_loss = None

        # Initialization required if a data_dependent_initialization_method
        # is set and this is our first time fitting (i.e. fit_info is empty).
        needs_initialization = self.hyperparameters[
            'data_dependent_initialization_method'
        ] is not None and not self.fit_info

        start = time.time()
        last_progress_print = None
        x_dict_with_random_negatives = {}
        for i in range(self.hyperparameters['max_epochs']):
            random_negative_peptides_list = []
            for (length, count) in num_random_negative.iteritems():
                random_negative_peptides_list.extend(
                    random_peptides(
                        count,
                        length=length,
                        distribution=aa_distribution))
            random_negative_peptides = EncodableSequences.create(
                random_negative_peptides_list)
            random_negative_peptides_encoding = (
                self.peptides_to_network_input(random_negative_peptides))

            if not x_dict_with_random_negatives:
                if len(random_negative_peptides) > 0:
                    x_dict_with_random_negatives["peptide"] = numpy.concatenate([
                        random_negative_peptides_encoding,
                        peptide_encoding,
                    ])
                    if 'allele' in x_dict_without_random_negatives:
                        x_dict_with_random_negatives['allele'] = numpy.concatenate([
                            x_dict_without_random_negatives['allele'][
                                numpy.random.choice(
                                    x_dict_without_random_negatives[
                                        'allele'].shape[0],
                                    size=len(random_negative_peptides_list))],
                            x_dict_without_random_negatives['allele']
                        ])
                else:
                    x_dict_with_random_negatives = (
                        x_dict_without_random_negatives)
            else:
                # Update x_dict_with_random_negatives in place.
                # This is more memory efficient than recreating it as above.
                if len(random_negative_peptides) > 0:
                    x_dict_with_random_negatives["peptide"][:len(random_negative_peptides)] = (
                        random_negative_peptides_encoding
                    )
                    if 'allele' in x_dict_with_random_negatives:
                        x_dict_with_random_negatives['allele'][:len(random_negative_peptides)] = (
                            x_dict_with_random_negatives['allele'][
                                len(random_negative_peptides) + numpy.random.choice(
                                    x_dict_with_random_negatives['allele'].shape[0] -
                                    len(random_negative_peptides),
                                    size=len(random_negative_peptides))
                            ]
                        )

            if needs_initialization:
                self.data_dependent_weights_initialization(
                    self.network(),
                    x_dict_with_random_negatives,
                    method=self.hyperparameters[
                        'data_dependent_initialization_method'],
                    verbose=verbose)
                needs_initialization = False

            epoch_start = time.time()
            fit_history = self.network().fit(
                x_dict_with_random_negatives,
                y_dict_with_random_negatives,
                shuffle=True,
                batch_size=self.hyperparameters['minibatch_size'],
                verbose=verbose,
                epochs=i + 1,
                initial_epoch=i,
                validation_split=self.hyperparameters['validation_split'],
                sample_weight=sample_weights_with_random_negatives)
            epoch_time = time.time() - epoch_start

            for (key, value) in fit_history.history.items():
                fit_info[key].extend(value)

            # Print progress no more often than once every few seconds.
            if progress_print_interval is not None and (
                    not last_progress_print or (
                        time.time() - last_progress_print
                        > progress_print_interval)):
                print((progress_preamble + " " +
                       "Epoch %3d / %3d [%0.2f sec]: loss=%g. "
                       "Min val loss (%s) at epoch %s" % (
                           i,
                           self.hyperparameters['max_epochs'],
                           epoch_time,
                           fit_info['loss'][-1],
                           str(min_val_loss),
                           min_val_loss_iteration)).strip())
                last_progress_print = time.time()

            if self.hyperparameters['validation_split']:
                val_loss = fit_info['val_loss'][-1]

                if min_val_loss is None or (
                        val_loss < min_val_loss - self.hyperparameters['min_delta']):
                    min_val_loss = val_loss
                    min_val_loss_iteration = i

                if self.hyperparameters['early_stopping']:
                    threshold = (
                        min_val_loss_iteration +
                        self.hyperparameters['patience'])
                    if i > threshold:
                        if progress_print_interval is not None:
                            print((progress_preamble + " " +
                                "Stopping at epoch %3d / %3d: loss=%g. "
                                "Min val loss (%g) at epoch %s" % (
                                    i,
                                    self.hyperparameters['max_epochs'],
                                    fit_info['loss'][-1],
                                    (
                                        min_val_loss if min_val_loss is not None
                                        else numpy.nan),
                                    min_val_loss_iteration)).strip())
                        break

            if progress_callback:
                progress_callback()

        fit_info["time"] = time.time() - start
        fit_info["num_points"] = len(peptides)
        self.fit_info.append(dict(fit_info))

    def predict(