fix tests

6e44ee45 · Tim O'Donnell · 99a3ea81 · 6e44ee45 · 6e44ee45 · 6e44ee45
Commit 6e44ee45 authored 7 years ago by Tim O'Donnell
--- a/mhcflurry/__init__.py
+++ b/mhcflurry/__init__.py
-from mhcflurry.class1_affinity_predictor import Class1AffinityPredictor
-from mhcflurry.class1_neural_network import Class1NeuralNetwork
-
-__version__ = "1.1.0"
+from .class1_affinity_predictor import Class1AffinityPredictor
+from .class1_neural_network import Class1NeuralNetwork
+from .version import __version__

 __all__ = [
    "__version__",

--- a/mhcflurry/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_predictor.py
@@ -7,6 +7,8 @@ import time
 import warnings
 from os.path import join, exists
 from os import mkdir
+from socket import gethostname
+from getpass import getuser

 import mhcnames
 import numpy
@@ -20,6 +22,7 @@ from .downloads import get_path
 from .encodable_sequences import EncodableSequences
 from .percent_rank_transform import PercentRankTransform
 from .regression_target import to_ic50
+from .version import __version__


 class Class1AffinityPredictor(object):
@@ -193,7 +196,8 @@ class Class1AffinityPredictor(object):
        the configurations of each Class1NeuralNetwork, along with per-network
        files giving the model weights. If there are pan-allele predictors in
        the ensemble, the allele pseudosequences are also stored in the
-        directory.
+        directory. There is also a small file "index.txt" with basic metadata:
+        when the models were trained, by whom, on what host.
        
        Parameters
        ----------
@@ -234,6 +238,18 @@ class Class1AffinityPredictor(object):
        write_manifest_df.to_csv(manifest_path, index=False)
        logging.info("Wrote: %s" % manifest_path)

+        # Write "info.txt"
+        info_path = join(models_dir, "info.txt")
+        rows = [
+            ("trained on", time.asctime()),
+            ("package   ", "mhcflurry %s" % __version__),
+            ("hostname  ", gethostname()),
+            ("user      ", getuser()),
+        ]
+        pandas.DataFrame(rows).to_csv(
+            info_path, sep="\t", header=False, index=False)
+        print("Wrote: %s" % info_path)
+
        if self.allele_to_percent_rank_transform:
            percent_ranks_df = None
            for (allele, transform) in self.allele_to_percent_rank_transform.items():

--- a/mhcflurry/version.py
+++ b/mhcflurry/version.py
+__version__ = "1.1.0"
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,7 @@ except:
    logging.warning("Conversion of long_description from MD to RST failed")
    pass

-with open('mhcflurry/__init__.py', 'r') as f:
+with open('mhcflurry/version.py', 'r') as f:
    version = re.search(
        r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
        f.read(),

--- a/test/test_class1_affinity_predictor.py
+++ b/test/test_class1_affinity_predictor.py
@@ -51,7 +51,7 @@ def test_a1_known_epitopes_in_newly_trained_model():
    allele = "HLA-A*01:01"
    df = pandas.read_csv(
        get_path(
-            "data_curated", "curated_training_data.csv.bz2"))
+            "data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
    df = df.ix[
        (df.allele == allele) &
        (df.peptide.str.len() >= 8) &
@@ -137,7 +137,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():

    df = pandas.read_csv(
        get_path(
-            "data_curated", "curated_training_data.csv.bz2"))
+            "data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
    df = df.ix[
        df.allele == allele
    ]

--- a/test/test_class1_neural_network.py
+++ b/test/test_class1_neural_network.py
@@ -37,7 +37,7 @@ def test_class1_neural_network_a0205_training_accuracy():

    df = pandas.read_csv(
        get_path(
-            "data_curated", "curated_training_data.csv.bz2"))
+            "data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
    df = df.ix[
        df.allele == allele
    ]

--- a/test/test_train_allele_specific_models_command.py
+++ b/test/test_train_allele_specific_models_command.py
@@ -57,12 +57,13 @@ def run_and_check(n_jobs=0):
        json.dump(HYPERPARAMETERS, fd)

    args = [
-        "--data", get_path("data_curated", "curated_training_data.csv.bz2"),
+        "--data", get_path("data_curated", "curated_training_data.no_mass_spec.csv.bz2"),
        "--hyperparameters", hyperparameters_filename,
        "--allele", "HLA-A*02:01", "HLA-A*01:01", "HLA-A*03:01",
        "--out-models-dir", models_dir,
        "--percent-rank-calibration-num-peptides-per-length", "10000",
        "--parallelization-num-jobs", str(n_jobs),
+        "--ignore-inequalities",
    ]
    print("Running with args: %s" % args)
    train_allele_specific_models_command.run(args)