Merge pull request #145 from openvax/v1.3

pan allele prediction (MHCflurry 1.3.0)

Merge pull request #145 from openvax/v1.3
pan allele prediction (MHCflurry 1.3.0)
18d0bd99 · Tim O'Donnell · GitHub · 74b751e6 · 3cbdfd69 · 18d0bd99
Unverified Commit 18d0bd99 authored 5 years ago by Tim O'Donnell Committed by GitHub 5 years ago
--- a/mhcflurry/percent_rank_transform.py
+++ b/mhcflurry/percent_rank_transform.py
+"""
+Class for transforming arbitrary values into percent ranks given a distribution.
+"""
 import numpy
 import pandas

+
 class PercentRankTransform(object):
    """
    Transform arbitrary values into percent ranks.
@@ -76,8 +80,3 @@ class PercentRankTransform(object):
        result.cdf = series.values
        result.bin_edges = series.index.values[1:-1]
        return result
-
-
-
-
-
--- a/mhcflurry/predict_command.py
+++ b/mhcflurry/predict_command.py
@@ -149,6 +149,10 @@ implementation_args.add_argument(


 def run(argv=sys.argv[1:]):
+    if not argv:
+        parser.print_help()
+        parser.exit(1)
+
    args = parser.parse_args(argv)

    set_keras_backend(backend=args.backend, num_threads=args.threads)
@@ -159,9 +163,9 @@ def run(argv=sys.argv[1:]):

    models_dir = args.models
    if models_dir is None:
-        # The reason we set the default here instead of in the argument parser is that
-        # we want to test_exists at this point, so the user gets a message instructing
-        # them to download the models if needed.
+        # The reason we set the default here instead of in the argument parser
+        # is that we want to test_exists at this point, so the user gets a
+        # message instructing them to download the models if needed.
        models_dir = get_default_class1_models_dir(test_exists=True)
    predictor = Class1AffinityPredictor.load(models_dir)

@@ -215,12 +219,13 @@ def run(argv=sys.argv[1:]):
        })
        logging.info(
            "Predicting for %d alleles and %d peptides = %d predictions" % (
-            len(args.alleles), len(args.peptides), len(df)))
+                len(args.alleles), len(args.peptides), len(df)))

    predictions = predictor.predict_to_dataframe(
        peptides=df[args.peptide_column].values,
        alleles=df[args.allele_column].values,
-        include_individual_model_predictions=args.include_individual_model_predictions,
+        include_individual_model_predictions=(
+            args.include_individual_model_predictions),
        throw=not args.no_throw)

    for col in predictions.columns:

--- a/mhcflurry/regression_target.py
+++ b/mhcflurry/regression_target.py
@@ -14,7 +14,7 @@ def from_ic50(ic50, max_ic50=50000.0):
    numpy.array of float

    """
-    x = 1.0 - (numpy.log(ic50) / numpy.log(max_ic50))
+    x = 1.0 - (numpy.log(numpy.maximum(ic50, 1e-12)) / numpy.log(max_ic50))
    return numpy.minimum(
        1.0,
        numpy.maximum(0.0, x))

--- a/mhcflurry/scoring.py
+++ b/mhcflurry/scoring.py
+"""
+Measures of prediction accuracy
+"""
 from __future__ import (
    print_function,
    division,

--- a/mhcflurry/select_allele_specific_models_command.py
+++ b/mhcflurry/select_allele_specific_models_command.py
@@ -22,7 +22,7 @@ tqdm.monitor_interval = 0  # see https://github.com/tqdm/tqdm/issues/481
 from .class1_affinity_predictor import Class1AffinityPredictor
 from .encodable_sequences import EncodableSequences
 from .common import configure_logging, random_peptides
-from .parallelism import worker_pool_with_gpu_assignments_from_args, add_worker_pool_args
+from .local_parallelism import worker_pool_with_gpu_assignments_from_args, add_local_parallelism_args
 from .regression_target import from_ic50


@@ -176,7 +176,7 @@ parser.add_argument(
    help="Keras verbosity. Default: %(default)s",
    default=0)

-add_worker_pool_args(parser)
+add_local_parallelism_args(parser)


 def run(argv=sys.argv[1:]):
@@ -205,7 +205,7 @@ def run(argv=sys.argv[1:]):
        df = pandas.read_csv(args.data)
        print("Loaded data: %s" % (str(df.shape)))

-        df = df.ix[
+        df = df.loc[
            (df.peptide.str.len() >= 8) & (df.peptide.str.len() <= 15)
        ]
        print("Subselected to 8-15mers: %s" % (str(df.shape)))

--- a/mhcflurry/select_pan_allele_models_command.py
+++ b/mhcflurry/select_pan_allele_models_command.py
--- a/mhcflurry/testing_utils.py
+++ b/mhcflurry/testing_utils.py
+"""
+Utilities used in MHCflurry unit tests.
+"""
+from . import Class1NeuralNetwork
+from .common import set_keras_backend
+
+
+def startup():
+    """
+    Configure Keras backend for running unit tests.
+    """
+    set_keras_backend("tensorflow-cpu", num_threads=2)
+
+
+def cleanup():
+    """
+    Clear tensorflow session and other process-wide resources.
+    """
+    import keras.backend as K
+    Class1NeuralNetwork.clear_model_cache()
+    K.clear_session()
--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
--- a/mhcflurry/train_pan_allele_models_command.py
+++ b/mhcflurry/train_pan_allele_models_command.py
--- a/mhcflurry/version.py
+++ b/mhcflurry/version.py
-__version__ = "1.2.4"
+__version__ = "1.3.0"
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ scikit-learn
 mhcnames
 pyyaml
 tqdm
+np_utils
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
@@ -58,11 +58,8 @@ if __name__ == '__main__':
        'mhcnames',
        'pyyaml',
        'tqdm',
+        'np_utils',
    ]
-    if PY2:
-        # concurrent.futures is a standard library in Py3 but Py2
-        # requires this backport
-        required_packages.append('futures')

    setup(
        name='mhcflurry',
@@ -78,14 +75,20 @@ if __name__ == '__main__':
                'mhcflurry-predict = mhcflurry.predict_command:run',
                'mhcflurry-class1-train-allele-specific-models = '
                    'mhcflurry.train_allele_specific_models_command:run',
+                'mhcflurry-class1-train-pan-allele-models = '
+                    'mhcflurry.train_pan_allele_models_command:run',
                'mhcflurry-class1-select-allele-specific-models = '
                    'mhcflurry.select_allele_specific_models_command:run',
+                'mhcflurry-class1-select-pan-allele-models = '
+                    'mhcflurry.select_pan_allele_models_command:run',
                'mhcflurry-calibrate-percentile-ranks = '
                    'mhcflurry.calibrate_percentile_ranks_command:run',
+                '_mhcflurry-cluster-worker-entry-point = '
+                    'mhcflurry.cluster_parallelism:worker_entry_point',
            ]
        },
        classifiers=[
-            'Development Status :: 4 - Beta',
+            'Development Status :: 5 - Production/Stable',
            'Environment :: Console',
            'Operating System :: OS Independent',
            'Intended Audience :: Science/Research',

--- a/test/data/hpv_predictions.csv
+++ b/test/data/hpv_predictions.csv
--- a/test/expensive_verify_pretrain_optimizable.py
+++ b/test/expensive_verify_pretrain_optimizable.py
--- a/test/test_allele_encoding.py
+++ b/test/test_allele_encoding.py
--- a/test/test_calibrate_percentile_ranks_command.py
+++ b/test/test_calibrate_percentile_ranks_command.py
--- a/test/test_changing_allele_representations.py
+++ b/test/test_changing_allele_representations.py
--- a/test/test_class1_affinity_predictor.py
+++ b/test/test_class1_affinity_predictor.py
--- a/test/test_class1_neural_network.py
+++ b/test/test_class1_neural_network.py
--- a/test/test_class1_pan.py
+++ b/test/test_class1_pan.py