Merge pull request #60 from hammerlab/additions2

Kim2014 comparison, mhcflurry-predict tool, reimplement parallelization with futures

Merge pull request #60 from hammerlab/additions2
Kim2014 comparison, mhcflurry-predict tool, reimplement parallelization with futures
eeb07c9a · Tim O'Donnell · GitHub · 770a1e21 · 00d545ec · eeb07c9a
Commit eeb07c9a authored 8 years ago by Tim O'Donnell Committed by GitHub 8 years ago
--- a/test/test_class1_allele_specific_cv_and_train_command.py
+++ b/test/test_class1_allele_specific_cv_and_train_command.py
@@ -58,9 +58,9 @@ def test_small_run():
        "--out-production-results", join(temp_dir, "production.csv"),
        "--out-models", join(temp_dir, "models"),
        "--cv-num-folds", "2",
-        "--joblib-num-jobs", "1",
        "--alleles", "HLA-A0201", "HLA-A0301",
        "--verbose",
+        "--num-local-threads", "1",
    ]
    print("Running cv_and_train_command with args: %s " % str(args))


--- a/test/test_cross_validation.py
+++ b/test/test_cross_validation.py
@@ -28,10 +28,7 @@ def test_imputation():
        n_folds=3,
        imputer=imputer,
        drop_similar_peptides=True,
-        alleles=["HLA-A0201", "HLA-A0202"],
-        n_jobs=2,
-        verbose=5,
-    )
+        alleles=["HLA-A0201", "HLA-A0202"])

    eq_(set(x.allele for x in folds), {"HLA-A0201", "HLA-A0202"})
    eq_(len(folds), 6)
@@ -70,11 +67,7 @@ def test_cross_validation_no_imputation():
        n_training_epochs=[3])
    print(models)

-    df = train_across_models_and_folds(
-        folds,
-        models,
-        n_jobs=2,
-        verbose=50)
+    df = train_across_models_and_folds(folds, models)
    print(df)
    assert df.test_auc.mean() > 0.6

@@ -92,10 +85,7 @@ def test_cross_validation_with_imputation():
        n_folds=3,
        imputer=imputer,
        drop_similar_peptides=True,
-        alleles=["HLA-A0201", "HLA-A0202"],
-        n_jobs=3,
-        verbose=5,
-    )
+        alleles=["HLA-A0201", "HLA-A0202"])

    eq_(set(x.allele for x in folds), {"HLA-A0201", "HLA-A0202"})
    eq_(len(folds), 6)
@@ -112,10 +102,6 @@ def test_cross_validation_with_imputation():
        n_training_epochs=[3])
    print(models)

-    df = train_across_models_and_folds(
-        folds,
-        models,
-        n_jobs=3,
-        verbose=5)
+    df = train_across_models_and_folds(folds, models)
    print(df)
    assert df.test_auc.mean() > 0.6
--- a/test/test_predict_command.py
+++ b/test/test_predict_command.py
+import tempfile
+import os
+
+import pandas
+from numpy.testing import assert_equal
+
+from mhcflurry import predict_command
+
+TEST_CSV = '''
+Allele,Peptide,Experiment
+HLA-A0201,SYNFEKKL,17
+HLA-B4403,AAAAAAAAA,17
+HLA-B4403,PPPPPPPP,18
+'''.strip()
+
+
+def test_csv():
+    args = ["--allele-column", "Allele", "--peptide-column", "Peptide"]
+    deletes = []
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as fd:
+            fd.write(TEST_CSV.encode())
+            deletes.append(fd.name)
+        fd_out = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+        deletes.append(fd_out.name)
+        full_args = [fd.name] + args + ["--out", fd_out.name]
+        print("Running with args: %s" % full_args)
+        predict_command.run(full_args)
+        result = pandas.read_csv(fd_out.name)
+        print(result)
+    finally:
+        for delete in deletes:
+            os.unlink(delete)
+
+    assert_equal(result.shape, (3, 4))
+
+
+def test_no_csv():
+    args = [
+        "--alleles", "HLA-A0201", "H-2Kb",
+        "--peptides", "SIINFEKL", "DENDREKLLL", "PICKLE",
+        "--prediction-column", "prediction",
+    ]
+
+    deletes = []
+    try:
+        fd_out = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+        deletes.append(fd_out.name)
+        full_args = args + ["--out", fd_out.name]
+        print("Running with args: %s" % full_args)
+        predict_command.run(full_args)
+        result = pandas.read_csv(fd_out.name)
+        print(result)
+    finally:
+        for delete in deletes:
+            os.unlink(delete)
+
+    assert_equal(result.shape, (6, 3))
+    sub_result1 = result.ix[result.peptide == "SIINFEKL"].set_index("allele")
+    assert (
+        sub_result1.ix["H-2Kb"].prediction <
+        sub_result1.ix["HLA-A0201"].prediction)