Skip to content
Snippets Groups Projects
test_train_pan_allele_models_command.py 5.54 KiB
Newer Older
"""
Tests for training and predicting using Class1 pan-allele models.
"""

Tim O'Donnell's avatar
Tim O'Donnell committed
import logging
logging.getLogger('tensorflow').disabled = True
logging.getLogger('matplotlib').disabled = True


import json
import os
import shutil
import tempfile
import subprocess

import pandas

from numpy.testing import assert_equal, assert_array_less

from mhcflurry import Class1AffinityPredictor,Class1NeuralNetwork
from mhcflurry.downloads import get_path

Tim O'Donnell's avatar
Tim O'Donnell committed
from mhcflurry.testing_utils import cleanup, startup
Tim O'Donnell's avatar
Tim O'Donnell committed
teardown = cleanup
Tim O'Donnell's avatar
Tim O'Donnell committed
setup = startup
Tim O'Donnell's avatar
Tim O'Donnell committed
os.environ["CUDA_VISIBLE_DEVICES"] = ""


HYPERPARAMETERS_LIST = [
{
    'activation': 'tanh',
    'allele_dense_layer_sizes': [],
    'batch_normalization': False,
    'dense_layer_l1_regularization': 0.0,
    'dense_layer_l2_regularization': 0.0,
    'dropout_probability': 0.5,
    'early_stopping': True,
    'init': 'glorot_uniform',
    'layer_sizes': [64],
    'learning_rate': None,
    'locally_connected_layers': [],
    'loss': 'custom:mse_with_inequalities',
Tim O'Donnell's avatar
Tim O'Donnell committed
    'max_epochs': 0,  # never selected
Tim O'Donnell's avatar
Tim O'Donnell committed
    'minibatch_size': 256,
    'optimizer': 'rmsprop',
    'output_activation': 'sigmoid',
    'patience': 10,
    'peptide_allele_merge_activation': '',
    'peptide_allele_merge_method': 'concatenate',
    'peptide_amino_acid_encoding': 'BLOSUM62',
    'peptide_dense_layer_sizes': [],
    'peptide_encoding': {
        'alignment_method': 'left_pad_centered_right_pad',
        'max_length': 15,
        'vector_encoding_name': 'BLOSUM62',
    },
    'random_negative_affinity_max': 50000.0,
    'random_negative_affinity_min': 20000.0,
    'random_negative_constant': 25,
    'random_negative_distribution_smoothing': 0.0,
    'random_negative_match_distribution': True,
    'random_negative_rate': 0.2,
Tim O'Donnell's avatar
Tim O'Donnell committed
    'train_data': {"pretrain": False},
    'validation_split': 0.1,
},
{
    'activation': 'tanh',
    'allele_dense_layer_sizes': [],
    'batch_normalization': False,
    'dense_layer_l1_regularization': 0.0,
    'dense_layer_l2_regularization': 0.0,
    'dropout_probability': 0.5,
    'early_stopping': True,
    'init': 'glorot_uniform',
    'layer_sizes': [32],
    'learning_rate': None,
    'locally_connected_layers': [],
    'loss': 'custom:mse_with_inequalities',
    'max_epochs': 5,
Tim O'Donnell's avatar
Tim O'Donnell committed
    'minibatch_size': 256,
    'optimizer': 'rmsprop',
    'output_activation': 'sigmoid',
    'patience': 10,
    'peptide_allele_merge_activation': '',
    'peptide_allele_merge_method': 'concatenate',
    'peptide_amino_acid_encoding': 'BLOSUM62',
    'peptide_dense_layer_sizes': [],
    'peptide_encoding': {
        'alignment_method': 'left_pad_centered_right_pad',
        'max_length': 15,
        'vector_encoding_name': 'BLOSUM62',
    },
    'random_negative_affinity_max': 50000.0,
    'random_negative_affinity_min': 20000.0,
    'random_negative_constant': 25,
    'random_negative_distribution_smoothing': 0.0,
    'random_negative_match_distribution': True,
    'random_negative_rate': 0.2,
Tim O'Donnell's avatar
Tim O'Donnell committed
    'train_data': {
Tim O'Donnell's avatar
Tim O'Donnell committed
        "pretrain": False,
Tim O'Donnell's avatar
Tim O'Donnell committed
        'pretrain_peptides_per_epoch': 128,
        'pretrain_max_epochs': 2,
        'pretrain_max_val_loss': 0.2,
Tim O'Donnell's avatar
Tim O'Donnell committed
    },
    'validation_split': 0.1,
},
Tim O'Donnell's avatar
Tim O'Donnell committed
]
Tim O'Donnell's avatar
Tim O'Donnell committed
def run_and_check(n_jobs=0, delete=True, additional_args=[]):
    models_dir = tempfile.mkdtemp(prefix="mhcflurry-test-models")
    hyperparameters_filename = os.path.join(
        models_dir, "hyperparameters.yaml")
    with open(hyperparameters_filename, "w") as fd:
        json.dump(HYPERPARAMETERS_LIST, fd)

Tim O'Donnell's avatar
Tim O'Donnell committed
    data_df = pandas.read_csv(
        get_path("data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
    selected_data_df = data_df.loc[data_df.allele.str.startswith("HLA-A")]
    selected_data_df.to_csv(
Tim O'Donnell's avatar
Tim O'Donnell committed
        os.path.join(models_dir, "_train_data.csv"), index=False)
Tim O'Donnell's avatar
Tim O'Donnell committed

    args = [
        "mhcflurry-class1-train-pan-allele-models",
Tim O'Donnell's avatar
Tim O'Donnell committed
        "--data", os.path.join(models_dir, "_train_data.csv"),
        "--allele-sequences", get_path("allele_sequences", "allele_sequences.csv"),
        "--hyperparameters", hyperparameters_filename,
        "--out-models-dir", models_dir,
        "--num-jobs", str(n_jobs),
Tim O'Donnell's avatar
Tim O'Donnell committed
        "--num-folds", "2",
Tim O'Donnell's avatar
Tim O'Donnell committed
        "--verbosity", "1",
Tim O'Donnell's avatar
Tim O'Donnell committed
    ] + additional_args
    print("Running with args: %s" % args)
    subprocess.check_call(args)

Tim O'Donnell's avatar
Tim O'Donnell committed
    # Run model selection
    models_dir_selected = tempfile.mkdtemp(
        prefix="mhcflurry-test-models-selected")
    args = [
        "mhcflurry-class1-select-pan-allele-models",
        "--data", os.path.join(models_dir, "train_data.csv.bz2"),
        "--models-dir", models_dir,
        "--out-models-dir", models_dir_selected,
        "--max-models", "1",
        "--num-jobs", str(n_jobs),
    ] + additional_args
    print("Running with args: %s" % args)
    subprocess.check_call(args)

    result = Class1AffinityPredictor.load(
        models_dir_selected, optimization_level=0)
    assert_equal(len(result.neural_networks), 2)
    predictions = result.predict(peptides=["SLYNTVATL"],
        alleles=["HLA-A*02:01"])
    assert_equal(predictions.shape, (1,))
    assert_array_less(predictions, 1000)

Tim O'Donnell's avatar
Tim O'Donnell committed
    if delete:
        print("Deleting: %s" % models_dir)
        shutil.rmtree(models_dir)
Tim O'Donnell's avatar
Tim O'Donnell committed
        shutil.rmtree(models_dir_selected)
if os.environ.get("KERAS_BACKEND") != "theano":
    def test_run_parallel():
Tim O'Donnell's avatar
Tim O'Donnell committed
        run_and_check(n_jobs=1)
        run_and_check(n_jobs=2)


def test_run_serial():
Tim O'Donnell's avatar
Tim O'Donnell committed
    run_and_check(n_jobs=0)
Tim O'Donnell's avatar
Tim O'Donnell committed


def test_run_cluster_parallelism():
    run_and_check(n_jobs=0, additional_args=[
        '--cluster-parallelism',
        '--cluster-results-workdir', '/tmp/'
    ])
Tim O'Donnell's avatar
Tim O'Donnell committed

Tim O'Donnell's avatar
Tim O'Donnell committed

Tim O'Donnell's avatar
Tim O'Donnell committed
if __name__ == "__main__":
    # run_and_check(n_jobs=0, delete=False)
Tim O'Donnell's avatar
Tim O'Donnell committed
    test_run_cluster_parallelism()