Skip to content
Snippets Groups Projects
test_antigen_presentation.py 5.87 KiB
Newer Older
Tim O'Donnell's avatar
Tim O'Donnell committed
import pickle

from nose.tools import eq_, assert_less

import numpy
Tim O'Donnell's avatar
Tim O'Donnell committed
from numpy.testing import assert_allclose, assert_array_equal
import pandas
from mhcflurry import amino_acid
from mhcflurry.antigen_presentation import (
    decoy_strategies,
Tim O'Donnell's avatar
Tim O'Donnell committed
    percent_rank_transform,
    presentation_component_models,
    presentation_model)
Tim O'Donnell's avatar
Tim O'Donnell committed
from mhcflurry.amino_acid import common_amino_acid_letters

######################
# Helper functions

def make_random_peptides(num, length=9):
    return [
        ''.join(peptide_sequence)
        for peptide_sequence in
        numpy.random.choice(
            amino_acid.common_amino_acid_letters, size=(num, length))
    ]


def hit_criterion(experiment_name, peptide):
    # Peptides with 'A' are always hits. Easy for model to learn.
    return 'A' in peptide


######################
# Small test dataset

Tim O'Donnell's avatar
Tim O'Donnell committed
PEPTIDES = make_random_peptides(1000, 9)
OTHER_PEPTIDES = make_random_peptides(1000, 9)

TRANSCRIPTS = [
    "transcript-%d" % i
    for i in range(1, 10)
]

EXPERIMENT_TO_ALLELES = {
    'exp1': ['HLA-A*01:01'],
    'exp2': ['HLA-A*02:01', 'HLA-B*51:01'],
}

EXPERIMENT_TO_EXPRESSION_GROUP = {
    'exp1': 'group1',
    'exp2': 'group2',
}

EXPERESSION_GROUPS = sorted(set(EXPERIMENT_TO_EXPRESSION_GROUP.values()))

TRANSCIPTS_DF = pandas.DataFrame(index=PEPTIDES, columns=EXPERESSION_GROUPS)
TRANSCIPTS_DF[:] = numpy.random.choice(TRANSCRIPTS, size=TRANSCIPTS_DF.shape)

PEPTIDES_AND_TRANSCRIPTS_DF = TRANSCIPTS_DF.stack().to_frame().reset_index()
PEPTIDES_AND_TRANSCRIPTS_DF.columns = ["peptide", "group", "transcript"]
del PEPTIDES_AND_TRANSCRIPTS_DF["group"]

PEPTIDES_DF = pandas.DataFrame({"peptide": PEPTIDES})
PEPTIDES_DF["experiment_name"] = "exp1"
PEPTIDES_DF["hit"] = [
    hit_criterion(row.experiment_name, row.peptide)
    for _, row in
    PEPTIDES_DF.iterrows()
]
Tim O'Donnell's avatar
Tim O'Donnell committed
print("Hit rate: %0.3f" % PEPTIDES_DF.hit.mean())
Tim O'Donnell's avatar
Tim O'Donnell committed
AA_COMPOSITION_DF = pandas.DataFrame({
    'peptide': sorted(set(PEPTIDES).union(set(OTHER_PEPTIDES))),
})
for aa in sorted(common_amino_acid_letters):
    AA_COMPOSITION_DF[aa] = AA_COMPOSITION_DF.peptide.str.count(aa)

AA_COMPOSITION_DF.index = AA_COMPOSITION_DF.peptide
del AA_COMPOSITION_DF['peptide']

HITS_DF = PEPTIDES_DF.ix[PEPTIDES_DF.hit].reset_index().copy()
del HITS_DF["hit"]

######################
# Tests

Tim O'Donnell's avatar
Tim O'Donnell committed
def test_percent_rank_transform():
    model = percent_rank_transform.PercentRankTransform()
    model.fit(numpy.arange(1000))
Tim O'Donnell's avatar
Tim O'Donnell committed
    assert_allclose(
Tim O'Donnell's avatar
Tim O'Donnell committed
        model.transform([-2, 0, 50, 100, 2000]),
Tim O'Donnell's avatar
Tim O'Donnell committed
        [0.0, 0.0, 5.0, 10.0, 100.0],
        err_msg=str(model.__dict__))
Tim O'Donnell's avatar
Tim O'Donnell committed


def test_mhcflurry_trained_on_hits():
    mhcflurry_model = presentation_component_models.MHCflurryTrainedOnHits(
        "basic",
        experiment_to_alleles=EXPERIMENT_TO_ALLELES,
        experiment_to_expression_group=EXPERIMENT_TO_EXPRESSION_GROUP,
        transcripts=TRANSCIPTS_DF,
        peptides_and_transcripts=PEPTIDES_AND_TRANSCRIPTS_DF,
Tim O'Donnell's avatar
Tim O'Donnell committed
        random_peptides_for_percent_rank=OTHER_PEPTIDES,
    mhcflurry_model.fit(HITS_DF)
    predictions = mhcflurry_model.predict(peptides)
    peptides["affinity"] = predictions["mhcflurry_basic_affinity"]
    peptides["percent_rank"] = predictions["mhcflurry_basic_percentile_rank"]
    assert_less(
        peptides.affinity[peptides.hit].mean(),
        peptides.affinity[~peptides.hit].mean())
    assert_less(
        peptides.percent_rank[peptides.hit].mean(),
        peptides.percent_rank[~peptides.hit].mean())


def test_presentation_model():
    mhcflurry_model = presentation_component_models.MHCflurryTrainedOnHits(
        "basic",
        experiment_to_alleles=EXPERIMENT_TO_ALLELES,
        experiment_to_expression_group=EXPERIMENT_TO_EXPRESSION_GROUP,
        transcripts=TRANSCIPTS_DF,
        peptides_and_transcripts=PEPTIDES_AND_TRANSCRIPTS_DF,
Tim O'Donnell's avatar
Tim O'Donnell committed
        random_peptides_for_percent_rank=OTHER_PEPTIDES,
Tim O'Donnell's avatar
Tim O'Donnell committed
    aa_content_model = (
        presentation_component_models.FixedPerPeptideQuantity(
            "aa composition",
            numpy.log1p(AA_COMPOSITION_DF)))

    decoys = decoy_strategies.UniformRandom(
Tim O'Donnell's avatar
Tim O'Donnell committed
        OTHER_PEPTIDES,
        decoys_per_hit=50)

    terms = {
        'A_ms': (
            [mhcflurry_model],
            ["log1p(mhcflurry_basic_affinity)"]),
Tim O'Donnell's avatar
Tim O'Donnell committed
        'P': (
            [aa_content_model],
            list(AA_COMPOSITION_DF.columns)),
Tim O'Donnell's avatar
Tim O'Donnell committed
    for kwargs in [{}, {'ensemble_size': 3}]:
Tim O'Donnell's avatar
Tim O'Donnell committed
        models = presentation_model.build_presentation_models(
            terms,
Tim O'Donnell's avatar
Tim O'Donnell committed
            ["A_ms", "A_ms + P"],
Tim O'Donnell's avatar
Tim O'Donnell committed
            decoy_strategy=decoys,
            **kwargs)
Tim O'Donnell's avatar
Tim O'Donnell committed
        eq_(len(models), 2)
Tim O'Donnell's avatar
Tim O'Donnell committed

Tim O'Donnell's avatar
Tim O'Donnell committed
        unfit_model = models["A_ms"]
        model = unfit_model.clone()
Tim O'Donnell's avatar
Tim O'Donnell committed
        model.fit(HITS_DF.ix[HITS_DF.experiment_name == "exp1"])

        peptides = PEPTIDES_DF.copy()
        peptides["prediction"] = model.predict(peptides)
Tim O'Donnell's avatar
Tim O'Donnell committed
        print(peptides)
        print("Hit mean", peptides.prediction[peptides.hit].mean())
        print("Decoy mean", peptides.prediction[~peptides.hit].mean())

Tim O'Donnell's avatar
Tim O'Donnell committed
        assert_less(
            peptides.prediction[~peptides.hit].mean(),
            peptides.prediction[peptides.hit].mean())
Tim O'Donnell's avatar
Tim O'Donnell committed

        model2 = pickle.loads(pickle.dumps(model))
        assert_array_equal(
            model.predict(peptides), model2.predict(peptides))

        model3 = unfit_model.clone()
        assert not model3.has_been_fit
        model3.restore_fit(model2.get_fit())
        assert_array_equal(
            model.predict(peptides), model3.predict(peptides))

        better_unfit_model = models["A_ms + P"]
        model = better_unfit_model.clone()
        model.fit(HITS_DF.ix[HITS_DF.experiment_name == "exp1"])
        peptides["prediction_better"] = model.predict(peptides)

        assert_less(
            peptides.prediction_better[~peptides.hit].mean(),
            peptides.prediction[~peptides.hit].mean())
        assert_less(
            peptides.prediction[peptides.hit].mean(),
            peptides.prediction_better[peptides.hit].mean())