diff --git a/mhcflurry/__init__.py b/mhcflurry/__init__.py index 4420dcbbfd919085e5bb9e82d0573f4ad11969a4..dbb0b2e2aeac63fa5588026c502efeb2643853f3 100644 --- a/mhcflurry/__init__.py +++ b/mhcflurry/__init__.py @@ -12,15 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .class1_affinity_prediction.class1_binding_predictor import ( - Class1BindingPredictor) -from .class1_affinity_prediction.multi_allele_predictor_ensemble import ( - MultiAllelePredictorEnsemble) +from .class1_affinity_prediction.class1_neural_network import ( + Class1NeuralNetwork) +from .class1_affinity_prediction.class1_affinity_predictor import ( + Class1AffinityPredictor) __version__ = "0.2.0" __all__ = [ - "Class1BindingPredictor", - "MultiAllelePredictorEnsemble", + "Class1NeuralNetwork", + "Class1AffinityPredictor", "__version__", ] diff --git a/mhcflurry/antigen_presentation/presentation_component_models/mhcflurry_trained_on_hits.py b/mhcflurry/antigen_presentation/presentation_component_models/mhcflurry_trained_on_hits.py index 58d0101e32535a48083b26d23bd3243d547237ea..9506e4e45ae2adb42ecf553df3622baeb900bdee 100644 --- a/mhcflurry/antigen_presentation/presentation_component_models/mhcflurry_trained_on_hits.py +++ b/mhcflurry/antigen_presentation/presentation_component_models/mhcflurry_trained_on_hits.py @@ -4,7 +4,7 @@ import pandas from numpy import log, exp, nanmean from ...affinity_measurement_dataset import AffinityMeasurementDataset -from ...class1_affinity_prediction import Class1BindingPredictor +from ...class1_affinity_prediction import Class1NeuralNetwork from ...common import normalize_allele_name from .mhc_binding_component_model_base import MHCBindingComponentModelBase @@ -99,7 +99,7 @@ class MHCflurryTrainedOnHits(MHCBindingComponentModelBase): dataset = AffinityMeasurementDataset( df.sample(frac=1)) # shuffle dataframe print("Train data: ", dataset) - model = Class1BindingPredictor( + model = Class1NeuralNetwork( **self.mhcflurry_hyperparameters) model.fit_dataset(dataset, verbose=True) self.allele_to_model[allele] = model diff --git a/mhcflurry/class1_affinity_prediction/__init__.py b/mhcflurry/class1_affinity_prediction/__init__.py index 7deec3d17cba797bc79e69834d742929cf03d476..96707e88331792c6a4cc0d0214ef656b7522bfff 100644 --- a/mhcflurry/class1_affinity_prediction/__init__.py +++ b/mhcflurry/class1_affinity_prediction/__init__.py @@ -1,7 +1,9 @@ from __future__ import absolute_import -from .class1_binding_predictor import Class1BindingPredictor +from .class1_neural_network import Class1NeuralNetwork +from .class1_affinity_predictor import Class1AffinityPredictor __all__ = [ - 'Class1BindingPredictor', + 'Class1NeuralNetwork', + 'Class1AffinityPredictor', ] diff --git a/mhcflurry/class1_affinity_prediction/multi_allele_predictor_ensemble.py b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py similarity index 98% rename from mhcflurry/class1_affinity_prediction/multi_allele_predictor_ensemble.py rename to mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py index 4e8b965ca4ade19be20f44023591931fefbcdb54..7c6fb5ef88c35cbf4a7832882f8217ca359b4b9f 100644 --- a/mhcflurry/class1_affinity_prediction/multi_allele_predictor_ensemble.py +++ b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py @@ -11,10 +11,10 @@ import mhcnames from ..encodable_sequences import EncodableSequences -from .class1_binding_predictor import Class1BindingPredictor +from .class1_neural_network import Class1NeuralNetwork -class MultiAllelePredictorEnsemble(object): +class Class1AffinityPredictor(object): def __init__( self, allele_to_allele_specific_models={}, @@ -117,7 +117,7 @@ class MultiAllelePredictorEnsemble(object): for (allele, v) in sorted(allele_to_allele_specific_models.items())))) - result = MultiAllelePredictorEnsemble( + result = Class1AffinityPredictor( allele_to_allele_specific_models=allele_to_allele_specific_models, class1_pan_allele_models=class1_pan_allele_models, allele_to_pseudosequence=pseudosequences, @@ -226,7 +226,7 @@ class MultiAllelePredictorEnsemble(object): output_assignments = ["output"] * len(encodable_peptides.sequences) for i in range(n_models): print("Training model %d / %d" % (i + 1, n_models)) - model = Class1BindingPredictor(**architecture_hyperparameters) + model = Class1NeuralNetwork(**architecture_hyperparameters) model.fit( encodable_peptides, affinities, diff --git a/mhcflurry/class1_affinity_prediction/class1_binding_predictor.py b/mhcflurry/class1_affinity_prediction/class1_neural_network.py similarity index 99% rename from mhcflurry/class1_affinity_prediction/class1_binding_predictor.py rename to mhcflurry/class1_affinity_prediction/class1_neural_network.py index 04b87d065f8d25abd29b24fafad6689f7a7ecd81..df8e767c46060fbef92a77cecb2b4d3847972529 100644 --- a/mhcflurry/class1_affinity_prediction/class1_binding_predictor.py +++ b/mhcflurry/class1_affinity_prediction/class1_neural_network.py @@ -21,7 +21,7 @@ from ..regression_target import to_ic50, from_ic50 from ..common import random_peptides, amino_acid_distribution -class Class1BindingPredictor(object): +class Class1NeuralNetwork(object): network_hyperparameter_defaults = HyperparameterDefaults( kmer_size=15, use_embedding=True, diff --git a/mhcflurry/class1_affinity_prediction/cv_and_train_command.py b/mhcflurry/class1_affinity_prediction/cv_and_train_command.py deleted file mode 100644 index 3d0159f4b1d1f6bb3c37783a92dae7a89fe40bd2..0000000000000000000000000000000000000000 --- a/mhcflurry/class1_affinity_prediction/cv_and_train_command.py +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright (c) 2016. Mount Sinai School of Medicine -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -Class1 allele-specific cross validation and training script. - -What it does: - * Run cross validation on a dataset over the specified model architectures - * Select the best architecture for each allele - * Re-train the best architecture on the full data for that allele - * Test "production" predictors on a held-out test set if available - -Features: - * Supports imputation as a hyperparameter that can be searched over - * Parallelized with concurrent.futures - -Note: - -The parallelization is primary intended to be used with an -alternative concurrent.futures Executor such as dask-distributed that supports -multi-node parallelization. Theano in particular seems to have deadlocks -when running with single-node parallelization. -''' -from __future__ import ( - print_function, - division, - absolute_import, -) -import sys -import argparse -import json -import logging -import time -import os -import socket -import hashlib -import pickle - -import numpy - -from .. import parallelism -from ..affinity_measurement_dataset import AffinityMeasurementDataset -from ..imputation_helpers import imputer_from_name -from .cross_validation import cross_validation_folds -from .train import ( - impute_and_select_allele, - train_across_models_and_folds, - AlleleSpecificTrainTestFold) - -parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - -parser.add_argument( - "--train-data", - metavar="X.csv", - required=True, - help="Training data") - -parser.add_argument( - "--test-data", - metavar="X.csv", - help="Optional test data") - -parser.add_argument( - "--model-architectures", - metavar="X.json", - type=argparse.FileType('r'), - required=True, - help="JSON file giving model architectures to assess in cross validation." - " Can be - to read from stdin") - -parser.add_argument( - "--imputer-description", - metavar="X.json", - type=argparse.FileType('r'), - help="JSON. Can be - to read from stdin") - -parser.add_argument( - "--alleles", - metavar="ALLELE", - nargs="+", - default=None, - help="Use only the specified alleles") - -parser.add_argument( - "--out-cv-results", - metavar="X.csv", - help="Write cross validation results to the given file") - -parser.add_argument( - "--out-production-results", - metavar="X.csv", - help="Write production model information to the given file") - -parser.add_argument( - "--out-models-dir", - metavar="DIR", - help="Write production models to files in this dir") - -parser.add_argument( - "--max-models", - type=int, - metavar="N", - help="Use only the first N models") - -parser.add_argument( - "--cv-num-folds", - type=int, - default=3, - metavar="N", - help="Number of cross validation folds. Default: %(default)s") - -parser.add_argument( - "--cv-folds-per-task", - type=int, - default=10, - metavar="N", - help="When parallelizing cross validation, each task trains one model " - "architecture on N folds. Set to 1 for maximum potential parallelism. " - "This is less efficient if you have limited workers, however, since " - "the model must get compiled for each task. Default: %(default)s.") - -parser.add_argument( - "--dask-scheduler", - metavar="HOST:PORT", - help="Host and port of dask distributed scheduler") - -parser.add_argument( - "--num-local-processes", - metavar="N", - type=int, - help="Processes (exclusive with --dask-scheduler and --num-local-threads)") - -parser.add_argument( - "--num-local-threads", - metavar="N", - type=int, - default=1, - help="Threads (exclusive with --dask-scheduler and --num-local-processes)") - -parser.add_argument( - "--min-samples-per-allele", - default=100, - metavar="N", - help="Don't train predictors for alleles with fewer than N samples. " - "Set to 0 to disable filtering. Default: %(default)s", - type=int) - -parser.add_argument( - "--quiet", - action="store_true", - default=False, - help="Output less info") - -parser.add_argument( - "--verbose", - action="store_true", - default=False, - help="Output more info") - -try: - import kubeface - kubeface.Client.add_args(parser) -except ImportError: - logging.error("Kubeface support disabled, not installed.") - - -def run(argv=sys.argv[1:]): - args = parser.parse_args(argv) - if args.verbose: - logging.root.setLevel(level="DEBUG") - elif not args.quiet: - logging.root.setLevel(level="INFO") - - logging.info("Running with arguments: %s" % args) - - # Set parallel backend - if args.dask_scheduler: - backend = parallelism.DaskDistributedParallelBackend( - args.dask_scheduler) - elif hasattr(args, 'storage_prefix') and args.storage_prefix: - backend = parallelism.KubefaceParallelBackend(args) - else: - if args.num_local_processes: - backend = parallelism.ConcurrentFuturesParallelBackend( - args.num_local_processes, - processes=True) - else: - backend = parallelism.ConcurrentFuturesParallelBackend( - args.num_local_threads, - processes=False) - - parallelism.set_default_backend(backend) - logging.info("Using parallel backend: %s" % backend) - go(args) - - -def go(args): - backend = parallelism.get_default_backend() - - model_architectures = json.loads(args.model_architectures.read()) - logging.info("Read %d model architectures" % len(model_architectures)) - if args.max_models: - model_architectures = model_architectures[:args.max_models] - logging.info( - "Subselected to %d model architectures" % len(model_architectures)) - - train_data = AffinityMeasurementDataset.from_csv(args.train_data) - logging.info("Loaded training dataset: %s" % train_data) - - test_data = None - if args.test_data: - test_data = AffinityMeasurementDataset.from_csv(args.test_data) - logging.info("Loaded testing dataset: %s" % test_data) - - if args.min_samples_per_allele: - train_data = train_data.filter_alleles_by_count( - args.min_samples_per_allele) - logging.info( - "Filtered training dataset to alleles with >= %d observations: %s" - % (args.min_samples_per_allele, train_data)) - - if any(x['impute'] for x in model_architectures): - if not args.imputer_description: - parser.error( - "--imputer-description is required when any models " - "use imputation") - imputer_description = json.load(args.imputer_description) - logging.info("Loaded imputer description: %s" % imputer_description) - imputer_kwargs_defaults = { - 'min_observations_per_peptide': 2, - 'min_observations_per_allele': 10, - } - impute_kwargs = dict( - (key, imputer_description.pop(key, default)) - for (key, default) in imputer_kwargs_defaults.items()) - - imputer = imputer_from_name(**imputer_description) - else: - imputer = None - impute_kwargs = {} - - logging.info( - "Generating cross validation folds. Imputation: %s" % - ("yes" if imputer else "no")) - cv_folds = cross_validation_folds( - train_data, - n_folds=args.cv_num_folds, - imputer=imputer, - impute_kwargs=impute_kwargs, - drop_similar_peptides=True, - alleles=args.alleles) - - logging.info( - "Training %d model architectures across %d folds = %d models" - % ( - len(model_architectures), - len(cv_folds), - len(model_architectures) * len(cv_folds))) - start = time.time() - cv_results = train_across_models_and_folds( - cv_folds, - model_architectures, - folds_per_task=args.cv_folds_per_task) - logging.info( - "Completed cross validation in %0.2f seconds" % (time.time() - start)) - - cv_results["summary_score"] = ( - cv_results.test_auc.fillna(0) + - cv_results.test_tau.fillna(0) + - cv_results.test_f1.fillna(0)) - - allele_and_model_to_ranks = {} - for allele in cv_results.allele.unique(): - model_ranks = ( - cv_results.ix[cv_results.allele == allele] - .groupby("model_num") - .summary_score - .mean() - .rank(method='first', ascending=False, na_option="top") - .astype(int)) - allele_and_model_to_ranks[allele] = model_ranks.to_dict() - - cv_results["summary_rank"] = [ - allele_and_model_to_ranks[row.allele][row.model_num] - for (_, row) in cv_results.iterrows() - ] - - if args.out_cv_results: - cv_results.to_csv(args.out_cv_results, index=False) - print("Wrote: %s" % args.out_cv_results) - - numpy.testing.assert_equal( - set(cv_results.summary_rank), - set(1 + numpy.arange(len(model_architectures)))) - - best_architectures_by_allele = ( - cv_results.ix[cv_results.summary_rank == 1] - .set_index("allele") - .model_num - .to_dict()) - - logging.info("") - train_folds = [] - train_models = [] - imputation_args_list = [] - best_architectures = [] - for (allele_num, allele) in enumerate(cv_results.allele.unique()): - best_index = best_architectures_by_allele[allele] - architecture = model_architectures[best_index] - best_architectures.append(architecture) - train_models.append(architecture) - logging.info( - "Allele: %s best architecture is index %d: %s" % - (allele, best_index, architecture)) - - if architecture['impute']: - imputation_args = dict(impute_kwargs) - imputation_args.update(dict( - dataset=train_data, - imputer=imputer, - allele=allele)) - imputation_args_list.append(imputation_args) - - test_data_this_allele = None - if test_data is not None: - test_data_this_allele = test_data.get_allele(allele) - fold = AlleleSpecificTrainTestFold( - allele=allele, - train=train_data.get_allele(allele), - imputed_train=None, - test=test_data_this_allele) - train_folds.append(fold) - - if imputation_args_list: - imputation_results = list(backend.map( - lambda kwargs: impute_and_select_allele(**kwargs), - imputation_args_list)) - - new_train_folds = [] - for (best_architecture, train_fold) in zip( - best_architectures, train_folds): - imputed_train = None - if best_architecture['impute']: - imputed_train = imputation_results.pop(0) - new_train_folds.append( - train_fold._replace(imputed_train=imputed_train)) - assert not imputation_results - - train_folds = new_train_folds - - logging.info("Training %d production models" % len(train_folds)) - start = time.time() - train_results = train_across_models_and_folds( - train_folds, - train_models, - cartesian_product_of_folds_and_models=False, - return_predictors=args.out_models_dir is not None) - logging.info( - "Completed production training in %0.2f seconds" - % (time.time() - start)) - - if args.out_models_dir: - predictor_names = [] - run_name = (hashlib.sha1( - ("%s-%f" % (socket.gethostname(), time.time())).encode()) - .hexdigest()[:8]) - for (_, row) in train_results.iterrows(): - predictor_name = "-".join(str(x) for x in [ - row.allele, - "impute" if row.model_impute else "noimpute", - "then".join(str(s) for s in row.model_layer_sizes), - "dropout%g" % row.model_dropout_probability, - "fracneg%g" % row.model_fraction_negative, - run_name, - ]).replace(".", "_") - predictor_names.append(predictor_name) - out_path = os.path.join( - args.out_models_dir, predictor_name + ".pickle") - with open(out_path, "wb") as fd: - # Use this protocol so we have Python 2 compatability. - pickle.dump(row.predictor, fd, protocol=2) - print("Wrote: %s" % out_path) - del train_results["predictor"] - train_results["predictor_name"] = predictor_names - - if args.out_production_results: - train_results.to_csv(args.out_production_results, index=False) - print("Wrote: %s" % args.out_production_results) diff --git a/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py b/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py index e8a67737eb989c0a6df1142abd7a9d0a40703e71..4b4201915e8700b6fe633f94db3d2b711edd0d5c 100644 --- a/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py +++ b/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py @@ -13,7 +13,7 @@ import pandas import mhcnames -from .class1_binding_predictor import Class1BindingPredictor +from .class1_neural_network import Class1NeuralNetwork from ..common import configure_logging def normalize_allele_name(s): @@ -122,7 +122,7 @@ def run(): train_data = df.ix[df.allele == allele].dropna().sample( frac=1.0) - model = Class1BindingPredictor( + model = Class1NeuralNetwork( verbose=args.verbosity, **hyperparameters) diff --git a/mhcflurry/predict_command.py b/mhcflurry/predict_command.py index 2231c2a2f34f1cdba74acd6d92432b9567f1d811..58c37c0c21bd48306013d6108ed23922495c888e 100644 --- a/mhcflurry/predict_command.py +++ b/mhcflurry/predict_command.py @@ -46,7 +46,8 @@ import pandas import itertools from .downloads import get_path -from . import class1_affinity_prediction, class1_allele_specific_ensemble +from .class1_affinity_prediction import Class1AffinityPredictor + parser = argparse.ArgumentParser( description=__doc__, @@ -94,24 +95,11 @@ parser.add_argument( help="Output column name for predictions. Default: '%(default)s'") parser.add_argument( - "--predictor", - choices=("class1-allele-specific-single", "class1-allele-specific-ensemble"), - default="class1-allele-specific-ensemble", - help="Predictor to use. Default: %(default)s.") - -parser.add_argument( - "--models-class1-allele-specific-ensemble", - metavar="DIR", - default=None, - help="Directory containing class1 allele specific ensemble models. " - "Default: %s" % get_path("models_class1_allele_specific_ensemble", test_exists=False)) - -parser.add_argument( - "--models-class1-allele-specific-single", + "--models", metavar="DIR", default=None, - help="Directory containing class1 allele specific single models. " - "Default: %s" % get_path("models_class1_allele_specific_single", test_exists=False)) + help="Directory containing models. " + "Default: %s" % get_path("models_class1", test_exists=False)) def run(argv=sys.argv[1:]): @@ -155,29 +143,13 @@ def run(argv=sys.argv[1:]): print("Predicting for %d alleles and %d peptides = %d predictions" % ( len(args.alleles), len(args.peptides), len(df))) - if args.predictor == "class1-allele-specific-single": - models_dir = args.models_class1_allele_specific_single - if models_dir is None: - # The reason we set the default here instead of in the argument parser is that - # we want to test_exists at this point, so the user gets a message instructing - # them to download the models if needed. - models_dir = get_path("models_class1_allele_specific_single") - predictor = ( - class1_affinity_prediction - .class1_single_model_multi_allele_predictor - .Class1SingleModelMultiAllelePredictor - ).load_from_download_directory(models_dir) - elif args.predictor == "class1-allele-specific-ensemble": - models_dir = args.models_class1_allele_specific_ensemble - if models_dir is None: - models_dir = get_path("models_class1_allele_specific_ensemble") - predictor = ( - class1_allele_specific_ensemble - .class1_ensemble_multi_allele_predictor - .Class1EnsembleMultiAllelePredictor - ).load_from_download_directory(models_dir) - else: - assert False + models_dir = args.models + if models_dir is None: + # The reason we set the default here instead of in the argument parser is that + # we want to test_exists at this point, so the user gets a message instructing + # them to download the models if needed. + models_dir = get_path("models_class1") + predictor = Class1AffinityPredictor.load(models_dir) predictions = {} # allele -> peptide -> value for (allele, sub_df) in df.groupby(args.allele_column): diff --git a/test/dummy_models.py b/test/dummy_models.py index 0ea99cca80b5d8e71cb27e93c88f69d08fe6b6f1..5fb9facc3bcf6c9dc0d109201a7a222c93c5ebf4 100644 --- a/test/dummy_models.py +++ b/test/dummy_models.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -from mhcflurry import Class1BindingPredictor +from mhcflurry import Class1NeuralNetwork class Dummy9merIndexEncodingModel(object): """ @@ -30,19 +30,19 @@ class Dummy9merIndexEncodingModel(object): n_cols,) return np.ones(n_rows, dtype=float) * self.constant_output_value -always_zero_predictor_with_unknown_AAs = Class1BindingPredictor( +always_zero_predictor_with_unknown_AAs = Class1NeuralNetwork( model=Dummy9merIndexEncodingModel(0), allow_unknown_amino_acids=True) -always_zero_predictor_without_unknown_AAs = Class1BindingPredictor( +always_zero_predictor_without_unknown_AAs = Class1NeuralNetwork( model=Dummy9merIndexEncodingModel(0), allow_unknown_amino_acids=False) -always_one_predictor_with_unknown_AAs = Class1BindingPredictor( +always_one_predictor_with_unknown_AAs = Class1NeuralNetwork( model=Dummy9merIndexEncodingModel(1), allow_unknown_amino_acids=True) -always_one_predictor_without_unknown_AAs = Class1BindingPredictor( +always_one_predictor_without_unknown_AAs = Class1NeuralNetwork( model=Dummy9merIndexEncodingModel(1), allow_unknown_amino_acids=False) diff --git a/test/test_class1_binding_predictor_A0205.py b/test/test_class1_binding_predictor_A0205.py index 451d3ca734cb860655edf04fe4577eae54ef4a7e..9010e99a703f0a846321e19c4a33164fba631df5 100644 --- a/test/test_class1_binding_predictor_A0205.py +++ b/test/test_class1_binding_predictor_A0205.py @@ -2,7 +2,7 @@ import numpy import pandas numpy.random.seed(0) -from mhcflurry import Class1BindingPredictor +from mhcflurry import Class1NeuralNetwork from nose.tools import eq_ from numpy import testing @@ -25,7 +25,7 @@ def test_class1_binding_predictor_A0205_training_accuracy(): df.measurement_source == "kim2014" ] - predictor = Class1BindingPredictor( + predictor = Class1NeuralNetwork( activation="tanh", layer_sizes=[64], max_epochs=1000, # Memorize the dataset. diff --git a/test/test_hyperparameters.py b/test/test_hyperparameters.py index a840bbf7b458e3c3c7fa3cb0f0eafdab9c834c57..acc0d417498c60cc01132a749abc030cadff180f 100644 --- a/test/test_hyperparameters.py +++ b/test/test_hyperparameters.py @@ -1,6 +1,6 @@ from numpy.testing import assert_equal -from mhcflurry.class1_affinity_prediction import Class1BindingPredictor +from mhcflurry.class1_affinity_prediction import Class1NeuralNetwork def test_all_combinations_of_hyperparameters(): @@ -8,7 +8,7 @@ def test_all_combinations_of_hyperparameters(): activation=["tanh", "sigmoid"], fraction_negative=[0, 0.2]) results = ( - Class1BindingPredictor + Class1NeuralNetwork .hyperparameter_defaults .models_grid(**combinations_dict)) assert_equal(len(results), 4) diff --git a/test/test_imputation.py b/test/test_imputation.py index d30f12f7229475ccbed003e6cda8ffef204450ec..8376801851d20670808c83dcd7a5e680d1dcb308 100644 --- a/test/test_imputation.py +++ b/test/test_imputation.py @@ -3,7 +3,7 @@ np.random.seed(0) from mhcflurry.imputation_helpers import imputer_from_name from mhcflurry.affinity_measurement_dataset import AffinityMeasurementDataset -from mhcflurry import Class1BindingPredictor +from mhcflurry import Class1NeuralNetwork from fancyimpute import MICE, KNN, SoftImpute, IterativeSVD from nose.tools import eq_ @@ -54,7 +54,7 @@ def test_performance_improves_for_A0205_with_pretraining(): print("AffinityMeasurementDataset with only A0205, # entries: %d" % ( len(a0205_data_without_imputation))) - predictor_without_imputation = Class1BindingPredictor( + predictor_without_imputation = Class1NeuralNetwork( name="A0205-no-impute") X_index, ic50_true, sample_weights, _ = ( @@ -97,7 +97,7 @@ def test_performance_improves_for_A0205_with_pretraining(): assert sample_weights_imputed.max() <= 1, sample_weights_imputed.max() assert ic50_imputed.min() >= 0, ic50_imputed.min() - predictor_with_imputation = Class1BindingPredictor(name="A0205-impute") + predictor_with_imputation = Class1NeuralNetwork(name="A0205-impute") predictor_with_imputation.fit_kmer_encoded_arrays( X=X_index, diff --git a/test/test_serialization.py b/test/test_serialization.py index 98870bb89eb0b66fb565aa06f039757bfe3f7f1a..e06ffc2cc366be971e906a5111666698cfedb529 100644 --- a/test/test_serialization.py +++ b/test/test_serialization.py @@ -1,13 +1,13 @@ import pickle import numpy as np -from mhcflurry.class1_affinity_prediction import Class1BindingPredictor +from mhcflurry.class1_affinity_prediction import Class1NeuralNetwork def test_predict_after_saving_model_to_disk(): # don't even bother fitting the model, just save its random weights # and check we get the same predictions back afterward - model = Class1BindingPredictor(name="rando") + model = Class1NeuralNetwork(name="rando") peptides = ["A" * 9, "C" * 9] original_predictions = model.predict(peptides)