Skip to content
Snippets Groups Projects
class1_affinity_predictor.py 42.7 KiB
Newer Older
Tim O'Donnell's avatar
Tim O'Donnell committed
        if include_percentile_ranks:
            if self.allele_to_percent_rank_transform:
                df["prediction_percentile"] = self.percentile_ranks(
                    df.prediction,
                    alleles=df.normalized_allele.values,
                    throw=throw)
Tim O'Donnell's avatar
Tim O'Donnell committed
            else:
                warnings.warn("No percentile rank information available.")
Tim O'Donnell's avatar
Tim O'Donnell committed
    @staticmethod
    def save_weights(weights_list, filename):
Tim O'Donnell's avatar
Tim O'Donnell committed
        """
Tim O'Donnell's avatar
Tim O'Donnell committed
        Save the model weights to the given filename using numpy's ".npz"
        format.
    
Tim O'Donnell's avatar
Tim O'Donnell committed
        Parameters
        ----------
Tim O'Donnell's avatar
Tim O'Donnell committed
        weights_list : list of array
Tim O'Donnell's avatar
Tim O'Donnell committed
        
Tim O'Donnell's avatar
Tim O'Donnell committed
        filename : string
            Should end in ".npz".
    
Tim O'Donnell's avatar
Tim O'Donnell committed
        """
Tim O'Donnell's avatar
Tim O'Donnell committed
        numpy.savez(
            filename,
            **dict((("array_%d" % i), w) for (i, w) in enumerate(weights_list)))
Tim O'Donnell's avatar
Tim O'Donnell committed
    @staticmethod
    def load_weights(filename):
Tim O'Donnell's avatar
Tim O'Donnell committed
        """
Tim O'Donnell's avatar
Tim O'Donnell committed
        Restore model weights from the given filename, which should have been
        created with `save_weights`.
    
Tim O'Donnell's avatar
Tim O'Donnell committed
        Parameters
        ----------
Tim O'Donnell's avatar
Tim O'Donnell committed
        filename : string
            Should end in ".npz".
Tim O'Donnell's avatar
Tim O'Donnell committed

Tim O'Donnell's avatar
Tim O'Donnell committed
        Returns
Tim O'Donnell's avatar
Tim O'Donnell committed
        ----------
        list of array
Tim O'Donnell's avatar
Tim O'Donnell committed
        """
Tim O'Donnell's avatar
Tim O'Donnell committed
        loaded = numpy.load(filename)
        weights = [
            loaded["array_%d" % i]
            for i in range(len(loaded.keys()))
        ]
        loaded.close()
        return weights

    def calibrate_percentile_ranks(
            self,
            peptides=None,
            num_peptides_per_length=int(1e5),
            alleles=None,
        """
        Compute the cumulative distribution of ic50 values for a set of alleles
        over a large universe of random peptides, to enable computing quantiles in
        this distribution later.

        Parameters
        ----------
        peptides : sequence of string or EncodableSequences, optional
            Peptides to use
        num_peptides_per_length : int, optional
            If peptides argument is not specified, then num_peptides_per_length
            peptides are randomly sampled from a uniform distribution for each
            supported length
        alleles : sequence of string, optional
            Alleles to perform calibration for. If not specified all supported
            alleles will be calibrated.
        bins : object
            Anything that can be passed to numpy.histogram's "bins" argument
            can be used here, i.e. either an integer or a sequence giving bin
            edges. This is in ic50 space.

        Returns
        ----------
        EncodableSequences : peptides used for calibration
        """
        if bins is None:
            bins = to_ic50(numpy.linspace(1, 0, 1000))

        if alleles is None:
            alleles = self.supported_alleles

        if peptides is None:
            peptides = []
            lengths = range(
                self.supported_peptide_lengths[0],
                self.supported_peptide_lengths[1] + 1)
            for length in lengths:
                peptides.extend(
                    random_peptides(num_peptides_per_length, length))

        encoded_peptides = EncodableSequences.create(peptides)

        for (i, allele) in enumerate(alleles):
            predictions = self.predict(encoded_peptides, allele=allele)
            transform = PercentRankTransform()
            transform.fit(predictions, bins=bins)
            self.allele_to_percent_rank_transform[allele] = transform
    def filter_networks(self, predicate):
        """
        Return a new Class1AffinityPredictor containing a subset of this
        predictor's neural networks.

        Parameters
        ----------
        predicate : Class1NeuralNetwork -> boolean
            Function specifying which neural networks to include
        Returns
        -------
        Class1AffinityPredictor
        """
        allele_to_allele_specific_models = {}
        for (allele, models) in self.allele_to_allele_specific_models.items():
            allele_to_allele_specific_models[allele] = [
                m for m in models if predicate(m)
            ]
        class1_pan_allele_models = [
            m for m in self.class1_pan_allele_models if predicate(m)
        ]

        return Class1AffinityPredictor(
            allele_to_allele_specific_models=allele_to_allele_specific_models,
            class1_pan_allele_models=class1_pan_allele_models,
            allele_to_fixed_length_sequence=self.allele_to_fixed_length_sequence,
        )