Skip to content
Snippets Groups Projects
class1_affinity_predictor.py 41.6 KiB
Newer Older
Tim O'Donnell's avatar
Tim O'Donnell committed
        Parameters
        ----------
Tim O'Donnell's avatar
Tim O'Donnell committed
        filename : string
            Should end in ".npz".
Tim O'Donnell's avatar
Tim O'Donnell committed

Tim O'Donnell's avatar
Tim O'Donnell committed
        Returns
Tim O'Donnell's avatar
Tim O'Donnell committed
        ----------
        list of array
Tim O'Donnell's avatar
Tim O'Donnell committed
        """
Tim O'Donnell's avatar
Tim O'Donnell committed
        loaded = numpy.load(filename)
        weights = [
            loaded["array_%d" % i]
            for i in range(len(loaded.keys()))
        ]
        loaded.close()
        return weights

    def calibrate_percentile_ranks(
            self,
            peptides=None,
            num_peptides_per_length=int(1e5),
            alleles=None,
        """
        Compute the cumulative distribution of ic50 values for a set of alleles
        over a large universe of random peptides, to enable computing quantiles in
        this distribution later.

        Parameters
        ----------
        peptides : sequence of string or EncodableSequences, optional
            Peptides to use
        num_peptides_per_length : int, optional
            If peptides argument is not specified, then num_peptides_per_length
            peptides are randomly sampled from a uniform distribution for each
            supported length
        alleles : sequence of string, optional
            Alleles to perform calibration for. If not specified all supported
            alleles will be calibrated.
        bins : object
            Anything that can be passed to numpy.histogram's "bins" argument
            can be used here, i.e. either an integer or a sequence giving bin
            edges. This is in ic50 space.

        Returns
        ----------
        EncodableSequences : peptides used for calibration
        """
        if bins is None:
            bins = to_ic50(numpy.linspace(1, 0, 1000))

        if alleles is None:
            alleles = self.supported_alleles

        if peptides is None:
            peptides = []
            lengths = range(
                self.supported_peptide_lengths[0],
                self.supported_peptide_lengths[1] + 1)
            for length in lengths:
                peptides.extend(
                    random_peptides(num_peptides_per_length, length))

        encoded_peptides = EncodableSequences.create(peptides)

        for (i, allele) in enumerate(alleles):
            predictions = self.predict(encoded_peptides, allele=allele)
            transform = PercentRankTransform()
            transform.fit(predictions, bins=bins)
            self.allele_to_percent_rank_transform[allele] = transform
    def filter_networks(self, predicate):
        """
        Return a new Class1AffinityPredictor containing a subset of this
        predictor's neural networks.

        Parameters
        ----------
        predicate : Class1NeuralNetwork -> boolean
            Function specifying which neural networks to include
        Returns
        -------
        Class1AffinityPredictor
        """
        allele_to_allele_specific_models = {}
        for (allele, models) in self.allele_to_allele_specific_models.items():
            allele_to_allele_specific_models[allele] = [
                m for m in models if predicate(m)
            ]
        class1_pan_allele_models = [
            m for m in self.class1_pan_allele_models if predicate(m)
        ]

        return Class1AffinityPredictor(
            allele_to_allele_specific_models=allele_to_allele_specific_models,
            class1_pan_allele_models=class1_pan_allele_models,
            allele_to_fixed_length_sequence=self.allele_to_fixed_length_sequence,
        )