diff --git a/mhcflurry/__init__.py b/mhcflurry/__init__.py index 3e0abd0483da2095b8249ebb5918829f6d170e41..8538c4d3a9afb4f3b3b07b4de7c38c85f765be0a 100644 --- a/mhcflurry/__init__.py +++ b/mhcflurry/__init__.py @@ -1,7 +1,6 @@ -from mhcflurry.class1_affinity_predictor import Class1AffinityPredictor -from mhcflurry.class1_neural_network import Class1NeuralNetwork - -__version__ = "1.1.0" +from .class1_affinity_predictor import Class1AffinityPredictor +from .class1_neural_network import Class1NeuralNetwork +from .version import __version__ __all__ = [ "__version__", diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index ffd9744b1ba3c6d5077d12c0cc652936c118a412..b463b7b463e58d106d06eaef1251b4aa8af84fae 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -7,6 +7,8 @@ import time import warnings from os.path import join, exists from os import mkdir +from socket import gethostname +from getpass import getuser import mhcnames import numpy @@ -20,6 +22,7 @@ from .downloads import get_path from .encodable_sequences import EncodableSequences from .percent_rank_transform import PercentRankTransform from .regression_target import to_ic50 +from .version import __version__ class Class1AffinityPredictor(object): @@ -193,7 +196,8 @@ class Class1AffinityPredictor(object): the configurations of each Class1NeuralNetwork, along with per-network files giving the model weights. If there are pan-allele predictors in the ensemble, the allele pseudosequences are also stored in the - directory. + directory. There is also a small file "index.txt" with basic metadata: + when the models were trained, by whom, on what host. Parameters ---------- @@ -234,6 +238,18 @@ class Class1AffinityPredictor(object): write_manifest_df.to_csv(manifest_path, index=False) logging.info("Wrote: %s" % manifest_path) + # Write "info.txt" + info_path = join(models_dir, "info.txt") + rows = [ + ("trained on", time.asctime()), + ("package ", "mhcflurry %s" % __version__), + ("hostname ", gethostname()), + ("user ", getuser()), + ] + pandas.DataFrame(rows).to_csv( + info_path, sep="\t", header=False, index=False) + print("Wrote: %s" % info_path) + if self.allele_to_percent_rank_transform: percent_ranks_df = None for (allele, transform) in self.allele_to_percent_rank_transform.items(): diff --git a/mhcflurry/version.py b/mhcflurry/version.py new file mode 100644 index 0000000000000000000000000000000000000000..6849410aae0a8010e76d5f0a44ced13d750b0989 --- /dev/null +++ b/mhcflurry/version.py @@ -0,0 +1 @@ +__version__ = "1.1.0" diff --git a/setup.py b/setup.py index a66f0d8255cb82c5f978d1d019004797dd1f11b9..cfcb98f43d590a796776ab7ae5f1bbfef17ed770 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ except: logging.warning("Conversion of long_description from MD to RST failed") pass -with open('mhcflurry/__init__.py', 'r') as f: +with open('mhcflurry/version.py', 'r') as f: version = re.search( r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', f.read(), diff --git a/test/test_class1_affinity_predictor.py b/test/test_class1_affinity_predictor.py index 8532b76a3662581f4e439b136af5ddd42def4a03..40ac8c2f9ea22b1b6ad80495da061bd6b0acf5c5 100644 --- a/test/test_class1_affinity_predictor.py +++ b/test/test_class1_affinity_predictor.py @@ -51,7 +51,7 @@ def test_a1_known_epitopes_in_newly_trained_model(): allele = "HLA-A*01:01" df = pandas.read_csv( get_path( - "data_curated", "curated_training_data.csv.bz2")) + "data_curated", "curated_training_data.no_mass_spec.csv.bz2")) df = df.ix[ (df.allele == allele) & (df.peptide.str.len() >= 8) & @@ -137,7 +137,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): df = pandas.read_csv( get_path( - "data_curated", "curated_training_data.csv.bz2")) + "data_curated", "curated_training_data.no_mass_spec.csv.bz2")) df = df.ix[ df.allele == allele ] diff --git a/test/test_class1_neural_network.py b/test/test_class1_neural_network.py index f3d4ca4980c82d22d8c078a281a55a8328d6947e..b082f28cf89276bf842cc427d98ad7defab0985a 100644 --- a/test/test_class1_neural_network.py +++ b/test/test_class1_neural_network.py @@ -37,7 +37,7 @@ def test_class1_neural_network_a0205_training_accuracy(): df = pandas.read_csv( get_path( - "data_curated", "curated_training_data.csv.bz2")) + "data_curated", "curated_training_data.no_mass_spec.csv.bz2")) df = df.ix[ df.allele == allele ] diff --git a/test/test_train_allele_specific_models_command.py b/test/test_train_allele_specific_models_command.py index affcde7569b0ae5766ae554f50a73122f3e3894f..1489d1d1baf16d7b19fc5189560bd19f04ef4649 100644 --- a/test/test_train_allele_specific_models_command.py +++ b/test/test_train_allele_specific_models_command.py @@ -57,12 +57,13 @@ def run_and_check(n_jobs=0): json.dump(HYPERPARAMETERS, fd) args = [ - "--data", get_path("data_curated", "curated_training_data.csv.bz2"), + "--data", get_path("data_curated", "curated_training_data.no_mass_spec.csv.bz2"), "--hyperparameters", hyperparameters_filename, "--allele", "HLA-A*02:01", "HLA-A*01:01", "HLA-A*03:01", "--out-models-dir", models_dir, "--percent-rank-calibration-num-peptides-per-length", "10000", "--parallelization-num-jobs", str(n_jobs), + "--ignore-inequalities", ] print("Running with args: %s" % args) train_allele_specific_models_command.run(args)