Skip to content
Snippets Groups Projects
test_class1_presentation_predictor.py 4.45 KiB
Newer Older
Tim O'Donnell's avatar
Tim O'Donnell committed
import logging
logging.getLogger('tensorflow').disabled = True
logging.getLogger('matplotlib').disabled = True

import pandas
Tim O'Donnell's avatar
Tim O'Donnell committed
import tempfile
Tim O'Donnell's avatar
Tim O'Donnell committed
import pickle
Tim O'Donnell's avatar
Tim O'Donnell committed
from numpy.testing import assert_, assert_equal, assert_allclose, assert_array_equal
Tim O'Donnell's avatar
Tim O'Donnell committed
from nose.tools import assert_greater, assert_less
import numpy
Tim O'Donnell's avatar
Tim O'Donnell committed

from sklearn.metrics import roc_auc_score

from mhcflurry import Class1AffinityPredictor, Class1ProcessingPredictor
Tim O'Donnell's avatar
Tim O'Donnell committed
from mhcflurry.class1_presentation_predictor import Class1PresentationPredictor
from mhcflurry.downloads import get_path
Tim O'Donnell's avatar
Tim O'Donnell committed
from mhcflurry.common import random_peptides
from mhcflurry.testing_utils import cleanup, startup
Tim O'Donnell's avatar
Tim O'Donnell committed
from mhcflurry.regression_target import to_ic50
Tim O'Donnell's avatar
Tim O'Donnell committed
import mhcflurry.class1_presentation_predictor
mhcflurry.class1_presentation_predictor.PREDICT_CHUNK_SIZE = 15
from . import data_path

Tim O'Donnell's avatar
Tim O'Donnell committed
AFFINITY_PREDICTOR = None
CLEAVAGE_PREDICTOR = None
CLEAVAGE_PREDICTOR_NO_FLANKING = None

Tim O'Donnell's avatar
Tim O'Donnell committed

def setup():
Tim O'Donnell's avatar
Tim O'Donnell committed
    global AFFINITY_PREDICTOR
    global CLEAVAGE_PREDICTOR
    global CLEAVAGE_PREDICTOR_NO_FLANKING
    startup()
Tim O'Donnell's avatar
Tim O'Donnell committed
    AFFINITY_PREDICTOR = Class1AffinityPredictor.load(
Tim O'Donnell's avatar
Tim O'Donnell committed
        get_path("models_class1_pan", "models.combined"),
Tim O'Donnell's avatar
Tim O'Donnell committed
        optimization_level=0,
        max_models=1)
    CLEAVAGE_PREDICTOR = Class1ProcessingPredictor.load(
        get_path("models_class1_processing", "models"), max_models=1)
    CLEAVAGE_PREDICTOR_NO_FLANKING = Class1ProcessingPredictor.load(
        get_path("models_class1_processing_variants", "models.selected.no_flank"),
        max_models=1)
Tim O'Donnell's avatar
Tim O'Donnell committed


def teardown():
Tim O'Donnell's avatar
Tim O'Donnell committed
    global AFFINITY_PREDICTOR
    global CLEAVAGE_PREDICTOR
    global CLEAVAGE_PREDICTOR_NO_FLANKING
Tim O'Donnell's avatar
Tim O'Donnell committed
    AFFINITY_PREDICTOR = None
    CLEAVAGE_PREDICTOR = None
    CLEAVAGE_PREDICTOR_NO_FLANKING = None
def test_basic():
    df = pandas.read_csv(data_path("multiallelic.benchmark.small.csv.bz2"))
    train_df = df.loc[
        df.sample_id.isin(sorted(df.sample_id.unique())[:3])
    ]
    test_df = df.loc[
        ~df.sample_id.isin(train_df.sample_id.unique())
    ]
    test_df = test_df.sample(frac=0.01, weights=test_df.hit + 0.01)

    experiment_to_alleles = (
        df.drop_duplicates("sample_id").set_index("sample_id").hla.str.split().to_dict())
Tim O'Donnell's avatar
Tim O'Donnell committed

    predictor = Class1PresentationPredictor(
        affinity_predictor=AFFINITY_PREDICTOR,
        processing_predictor_without_flanks=CLEAVAGE_PREDICTOR_NO_FLANKING,
        processing_predictor_with_flanks=CLEAVAGE_PREDICTOR)

    predictor.fit(
        targets=train_df.hit.values,
        peptides=train_df.peptide.values,
        experiment_names=train_df.sample_id.values,
        alleles=experiment_to_alleles,
        n_flanks=train_df.n_flank.values,
        c_flanks=train_df.c_flank.values,
        verbose=2)

    def add_prediction_cols(test_df, predictor):
        test_df["prediction1"] = predictor.predict(
            peptides=test_df.peptide.values,
            experiment_names=test_df.sample_id.values,
            alleles=experiment_to_alleles,
            n_flanks=test_df.n_flank.values,
            c_flanks=test_df.c_flank.values,
            verbose=2)

        test_df["prediction2"] = predictor.predict(
            peptides=test_df.peptide.values,
            experiment_names=test_df.sample_id.values,
            alleles=experiment_to_alleles,
            verbose=2)

    add_prediction_cols(test_df, predictor)

    score1 = roc_auc_score(test_df.hit.values, test_df.prediction1.values)
    score2 = roc_auc_score(test_df.hit.values, test_df.prediction2.values)

    print("AUC", score1, score2)

    assert_greater(score1, 0.8)
    assert_greater(score2, 0.8)

    # Test saving, loading, pickling
Tim O'Donnell's avatar
Tim O'Donnell committed
    models_dir = tempfile.mkdtemp("_models")
    print(models_dir)
    predictor.save(models_dir)
    predictor2 = Class1PresentationPredictor.load(models_dir)
Tim O'Donnell's avatar
Tim O'Donnell committed
    predictor3 = pickle.loads(
        pickle.dumps(predictor, protocol=pickle.HIGHEST_PROTOCOL))
    predictor4 = pickle.loads(
        pickle.dumps(predictor2, protocol=pickle.HIGHEST_PROTOCOL))
    for (i, other_predictor) in enumerate([predictor2, predictor3, predictor4]):
        print("Testing identity", i + 1)
        other_test_df = test_df.copy()
        del other_test_df["prediction1"]
        del other_test_df["prediction2"]
        add_prediction_cols(other_test_df, other_predictor)
        numpy.testing.assert_array_almost_equal(
            test_df["prediction1"], other_test_df["prediction1"], decimal=6)
        numpy.testing.assert_array_almost_equal(
            test_df["prediction2"], other_test_df["prediction2"], decimal=6)