pylint nits

7f1f671d · Tim O'Donnell · 18d451d2 · 7f1f671d · 7f1f671d · 7f1f671d
Commit 7f1f671d authored 5 years ago by Tim O'Donnell
--- a/mhcflurry/__init__.py
+++ b/mhcflurry/__init__.py
+"""
+Class I MHC ligand prediction package
+"""
+
 from .class1_affinity_predictor import Class1AffinityPredictor
 from .class1_neural_network import Class1NeuralNetwork
 from .version import __version__

--- a/mhcflurry/allele_encoding.py
+++ b/mhcflurry/allele_encoding.py
@@ -138,4 +138,3 @@ class AlleleEncoding(object):
            result = vector_encoded[self.indices]
            self.encoding_cache[cache_key] = result
        return self.encoding_cache[cache_key]
-
--- a/mhcflurry/calibrate_percentile_ranks_command.py
+++ b/mhcflurry/calibrate_percentile_ranks_command.py
@@ -212,7 +212,7 @@ def run(argv=sys.argv[1:]):
        worker_pool.join()

    print("Percent rank calibration time: %0.2f min." % (
-       percent_rank_calibration_time / 60.0))
+        percent_rank_calibration_time / 60.0))
    print("Predictor written to: %s" % args.models_dir)



--- a/mhcflurry/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_predictor.py
@@ -9,12 +9,13 @@ from os import mkdir, environ
 from socket import gethostname
 from getpass import getuser
 from functools import partial
+from six import string_types

-import mhcnames
 import numpy
-import pandas
 from numpy.testing import assert_equal
-from six import string_types
+import pandas
+
+import mhcnames

 from .class1_neural_network import Class1NeuralNetwork
 from .common import random_peptides, positional_frequency_matrix
@@ -57,11 +58,11 @@ class Class1AffinityPredictor(object):
        Parameters
        ----------
        allele_to_allele_specific_models : dict of string -> list of `Class1NeuralNetwork`
-            Ensemble of single-allele models to use for each allele. 
-        
+            Ensemble of single-allele models to use for each allele.
+
        class1_pan_allele_models : list of `Class1NeuralNetwork`
            Ensemble of pan-allele models.
-        
+
        allele_to_sequence : dict of string -> string
            MHC allele name to fixed-length amino acid sequence (sometimes
            referred to as the pseudosequence). Required only if
@@ -106,7 +107,7 @@ class Class1AffinityPredictor(object):
        self._cache = {}
        self.optimization_info = {}

-        assert isinstance( self.allele_to_allele_specific_models, dict)
+        assert isinstance(self.allele_to_allele_specific_models, dict)
        assert isinstance(self.class1_pan_allele_models, list)

    @property
@@ -365,14 +366,14 @@ class Class1AffinityPredictor(object):
            weights_path = self.weights_path(models_dir, row.model_name)
            Class1AffinityPredictor.save_weights(
                row.model.get_weights(), weights_path)
-            logging.info("Wrote: %s" % weights_path)
+            logging.info("Wrote: %s", weights_path)

        write_manifest_df = self.manifest_df[[
            c for c in self.manifest_df.columns if c != "model"
        ]]
        manifest_path = join(models_dir, "manifest.csv")
        write_manifest_df.to_csv(manifest_path, index=False)
-        logging.info("Wrote: %s" % manifest_path)
+        logging.info("Wrote: %s", manifest_path)

        if write_metadata:
            # Write "info.txt"
@@ -399,7 +400,7 @@ class Class1AffinityPredictor(object):
            )
            allele_to_sequence_df.to_csv(
                join(models_dir, "allele_sequences.csv"), index=False)
-            logging.info("Wrote: %s" % join(models_dir, "allele_sequences.csv"))
+            logging.info("Wrote: %s", join(models_dir, "allele_sequences.csv"))

        if self.allele_to_percent_rank_transform:
            percent_ranks_df = None
@@ -414,7 +415,7 @@ class Class1AffinityPredictor(object):
                percent_ranks_path,
                index=True,
                index_label="bin")
-            logging.info("Wrote: %s" % percent_ranks_path)
+            logging.info("Wrote: %s", percent_ranks_path)

    @staticmethod
    def load(models_dir=None, max_models=None):
@@ -467,7 +468,7 @@ class Class1AffinityPredictor(object):
        if exists(join(models_dir, "allele_sequences.csv")):
            allele_to_sequence = pandas.read_csv(
                join(models_dir, "allele_sequences.csv"),
-                index_col=0).iloc[:,0].to_dict()
+                index_col=0).iloc[:, 0].to_dict()

        allele_to_percent_rank_transform = {}
        percent_ranks_path = join(models_dir, "percent_ranks.csv")
@@ -479,15 +480,15 @@ class Class1AffinityPredictor(object):

        logging.info(
            "Loaded %d class1 pan allele predictors, %d allele sequences, "
-            "%d percent rank distributions, and %d allele specific models: %s" % (
-                len(class1_pan_allele_models),
-                len(allele_to_sequence) if allele_to_sequence else 0,
-                len(allele_to_percent_rank_transform),
-                sum(len(v) for v in allele_to_allele_specific_models.values()),
-                ", ".join(
-                    "%s (%d)" % (allele, len(v))
-                    for (allele, v)
-                    in sorted(allele_to_allele_specific_models.items()))))
+            "%d percent rank distributions, and %d allele specific models: %s",
+            len(class1_pan_allele_models),
+            len(allele_to_sequence) if allele_to_sequence else 0,
+            len(allele_to_percent_rank_transform),
+            sum(len(v) for v in allele_to_allele_specific_models.values()),
+            ", ".join(
+                "%s (%d)" % (allele, len(v))
+                for (allele, v)
+                in sorted(allele_to_allele_specific_models.items())))

        result = Class1AffinityPredictor(
            allele_to_allele_specific_models=allele_to_allele_specific_models,
@@ -500,7 +501,7 @@ class Class1AffinityPredictor(object):
            logging.info("Optimizing models")
            optimized = result.optimize()
            logging.info(
-                "Optimization " + ("succeeded" if optimized else "failed"))
+                "Optimization %s", ("succeeded" if optimized else "failed"))
        return result

    def optimize(self):
@@ -527,7 +528,7 @@ class Class1AffinityPredictor(object):
                        merge_method="concatenate")
                ]
            except NotImplementedError as e:
-                logging.warning("Optimization failed: %s" % str(e))
+                logging.warning("Optimization failed: %s", str(e))
                return False
            self._manifest_df = None
            self.clear_cache()
@@ -584,8 +585,8 @@ class Class1AffinityPredictor(object):
        AlleleEncoding
        """
        if (self._master_allele_encoding is None or
-                    self._master_allele_encoding.allele_to_sequence !=
-                    self.allele_to_sequence):
+                self._master_allele_encoding.allele_to_sequence !=
+                self.allele_to_sequence):
            self._master_allele_encoding = AlleleEncoding(
                allele_to_sequence=self.allele_to_sequence)
        return self._master_allele_encoding
@@ -793,7 +794,7 @@ class Class1AffinityPredictor(object):
        encodable_peptides = EncodableSequences.create(peptides)
        models = []
        for i in range(n_models):
-            logging.info("Training model %d / %d" % (i + 1, n_models))
+            logging.info("Training model %d / %d", i + 1, n_models)
            model = Class1NeuralNetwork(**architecture_hyperparameters)
            model.fit(
                encodable_peptides,
@@ -879,10 +880,8 @@ class Class1AffinityPredictor(object):
                msg = "Allele %s has no percentile rank information" % allele
                if throw:
                    raise ValueError(msg)
-                else:
-                    warnings.warn(msg)
-                    # Return NaNs
-                    return numpy.ones(len(affinities)) * numpy.nan
+                warnings.warn(msg)
+                return numpy.ones(len(affinities)) * numpy.nan  # Return NaNs

        if alleles is None:
            raise ValueError("Specify allele or alleles")
@@ -1294,10 +1293,10 @@ class Class1AffinityPredictor(object):

        Returns
        ----------
-        None if motif_summary is False
+        dict of string -> pandas.DataFrame

-        Otherwise: dict of string -> pandas.DataFrame where keys are
-        "frequency_matrices" and "length_distributions".
+        If motif_summary is True, this will have keys  "frequency_matrices" and
+        "length_distributions". Otherwise it will be empty.

        """
        if bins is None:
@@ -1323,7 +1322,7 @@ class Class1AffinityPredictor(object):
        else:
            frequency_matrices = None
            length_distributions = None
-        for (i, allele) in enumerate(alleles):
+        for allele in alleles:
            start = time.time()
            predictions = self.predict(
                encoded_peptides, allele=allele, model_kwargs=model_kwargs)
@@ -1400,6 +1399,7 @@ class Class1AffinityPredictor(object):
                'frequency_matrices': frequency_matrices,
                'length_distributions': length_distributions,
            }
+        return {}

    def model_select(
            self,
@@ -1490,4 +1490,3 @@ class Class1AffinityPredictor(object):
                "model_selection": df,
            })
        return new_predictor
-
--- a/mhcflurry/cluster_parallelism.py
+++ b/mhcflurry/cluster_parallelism.py
@@ -349,4 +349,3 @@ def worker_entry_point(argv=sys.argv[1:]):
        if args.complete_dir:
            os.mkdir(args.complete_dir)
            print("Created: ", args.complete_dir)
-
--- a/mhcflurry/common.py
+++ b/mhcflurry/common.py
@@ -173,4 +173,4 @@ def positional_frequency_matrix(peptides):
        counts[i + 1] = pandas.Series([p[i] for p in peptides]).value_counts()
    result = (counts / len(peptides)).fillna(0.0).T
    result.index.name = 'position'
-    return result
\ No newline at end of file
+    return result
--- a/mhcflurry/custom_loss.py
+++ b/mhcflurry/custom_loss.py
@@ -251,4 +251,4 @@ def check_shape(name, arr, expected_shape):

 # Register custom losses.
 for cls in [MSEWithInequalities, MSEWithInequalitiesAndMultipleOutputs]:
-    CUSTOM_LOSSES[cls.name] = cls()
\ No newline at end of file
+    CUSTOM_LOSSES[cls.name] = cls()
--- a/mhcflurry/downloads.py
+++ b/mhcflurry/downloads.py
@@ -9,9 +9,9 @@ from __future__ import (
 )
 import logging
 import yaml
-from os.path import join, exists, relpath
-from pipes import quote
+from os.path import join, exists
 from os import environ
+from pipes import quote
 from collections import OrderedDict
 from appdirs import user_data_dir
 from pkg_resources import resource_string
@@ -81,8 +81,7 @@ def get_default_class1_models_dir(test_exists=True):
        if test_exists and not exists(result):
            raise IOError("No such directory: %s" % result)
        return result
-    else:
-        return get_path("models_class1", "models", test_exists=test_exists)
+    return get_path("models_class1", "models", test_exists=test_exists)


 def get_current_release_downloads():
@@ -160,13 +159,13 @@ def configure():
            metadata["releases"][_CURRENT_RELEASE]["compatibility-version"])
        current_compatability = metadata["current-compatibility-version"]
        if current_release_compatability != current_compatability:
-            logging.warn(
+            logging.warning(
                "The specified downloads are not compatible with this version "
                "of the MHCflurry codebase. Downloads: release %s, "
-                "compatability version: %d. Code compatability version: %d" % (
-                    _CURRENT_RELEASE,
-                    current_release_compatability,
-                    current_compatability))
+                "compatability version: %d. Code compatability version: %d",
+                _CURRENT_RELEASE,
+                current_release_compatability,
+                current_compatability)

        data_dir = environ.get("MHCFLURRY_DATA_DIR")
        if not data_dir:
@@ -176,6 +175,7 @@ def configure():
            data_dir = user_data_dir("mhcflurry", version="4")
        _DOWNLOADS_DIR = join(data_dir, _CURRENT_RELEASE)

-    logging.debug("Configured MHCFLURRY_DOWNLOADS_DIR: %s" % _DOWNLOADS_DIR)
+    logging.debug("Configured MHCFLURRY_DOWNLOADS_DIR: %s", _DOWNLOADS_DIR)
+

 configure()
--- a/mhcflurry/downloads_command.py
+++ b/mhcflurry/downloads_command.py
@@ -168,8 +168,7 @@ def fetch_subcommand(args):
                "\nThe requested download '%s' has already been downloaded. "
                "To re-download this data, first run: \n\t%s\nin a shell "
                "and then re-run this command.\n" +
-                "*" * 40)
-                % (name, 'rm -rf ' + quote(get_path(name))))
+                "*" * 40) % (name, 'rm -rf ' + quote(get_path(name))))
        if not info['downloaded'] and (name in args.download_name or default):
            items_to_fetch.add(name)


--- a/mhcflurry/encodable_sequences.py
+++ b/mhcflurry/encodable_sequences.py
+"""
+Class for encoding variable-length peptides to fixed-size numerical matrices
+"""
 from __future__ import (
    print_function,
    division,
@@ -26,9 +29,12 @@ class EncodingError(ValueError):

 class EncodableSequences(object):
    """
-    Sequences of amino acids.
+    Class for encoding variable-length peptides to fixed-size numerical matrices
    
    This class caches various encodings of a list of sequences.
+
+    In practice this is used only for peptides. To encode MHC allele sequences,
+    see AlleleEncoding.
    """
    unknown_character = "X"

@@ -299,8 +305,10 @@ class EncodableSequences(object):
            min_length = 5

            # Result array is int32, filled with X (null amino acid) value.
-            result = numpy.full(fill_value=amino_acid.AMINO_ACID_INDEX['X'],
-                shape=(len(sequences), max_length * 2), dtype="int32")
+            result = numpy.full(
+                fill_value=amino_acid.AMINO_ACID_INDEX['X'],
+                shape=(len(sequences), max_length * 2),
+                dtype="int32")

            df = pandas.DataFrame({"peptide": sequences}, dtype=numpy.object_)

@@ -319,9 +327,9 @@ class EncodableSequences(object):
                # Array of shape (num peptides, length) giving fixed-length
                # amino acid encoding each peptide of the current length.
                fixed_length_sequences = numpy.stack(sub_df.peptide.map(
-                    lambda s: numpy.array(
-                        [amino_acid.AMINO_ACID_INDEX[char] for char in
-                            s])).values)
+                    lambda s: numpy.array([
+                        amino_acid.AMINO_ACID_INDEX[char] for char in s
+                    ])).values)

                # Set left edge
                result[sub_df.index, :length] = fixed_length_sequences
@@ -334,8 +342,10 @@ class EncodableSequences(object):
            min_length = 5

            # Result array is int32, filled with X (null amino acid) value.
-            result = numpy.full(fill_value=amino_acid.AMINO_ACID_INDEX['X'],
-                shape=(len(sequences), max_length * 3), dtype="int32")
+            result = numpy.full(
+                fill_value=amino_acid.AMINO_ACID_INDEX['X'],
+                shape=(len(sequences), max_length * 3),
+                dtype="int32")

            df = pandas.DataFrame({"peptide": sequences}, dtype=numpy.object_)

@@ -354,9 +364,9 @@ class EncodableSequences(object):
                # Array of shape (num peptides, length) giving fixed-length
                # amino acid encoding each peptide of the current length.
                fixed_length_sequences = numpy.stack(sub_df.peptide.map(
-                    lambda s: numpy.array(
-                        [amino_acid.AMINO_ACID_INDEX[char] for char in
-                            s])).values)
+                    lambda s: numpy.array([
+                        amino_acid.AMINO_ACID_INDEX[char] for char in s
+                    ])).values)

                # Set left edge
                result[sub_df.index, :length] = fixed_length_sequences

--- a/mhcflurry/ensemble_centrality.py
+++ b/mhcflurry/ensemble_centrality.py
@@ -37,4 +37,4 @@ CENTRALITY_MEASURES = {
    "mean": partial(numpy.nanmean, axis=1),
    "median": partial(numpy.nanmedian, axis=1),
    "robust_mean": robust_mean,
-}
\ No newline at end of file
+}
--- a/mhcflurry/hyperparameters.py
+++ b/mhcflurry/hyperparameters.py
+"""
+Hyperparameter (neural network options) management
+"""
 from __future__ import (
    print_function,
    division,
@@ -70,8 +73,7 @@ class HyperparameterDefaults(object):
        if invalid_keys:
            raise ValueError(
                "No such model parameters: %s. Valid parameters are: %s"
-                % (" ".join(invalid_keys),
-                    " ".join(self.defaults)))
+                % (" ".join(invalid_keys), " ".join(self.defaults)))

    def models_grid(self, **kwargs):
        '''

--- a/mhcflurry/percent_rank_transform.py
+++ b/mhcflurry/percent_rank_transform.py
+"""
+Class for transforming arbitrary values into percent ranks given a distribution.
+"""
 import numpy
 import pandas

@@ -77,8 +80,3 @@ class PercentRankTransform(object):
        result.cdf = series.values
        result.bin_edges = series.index.values[1:-1]
        return result
-
-
-
-
-
--- a/mhcflurry/predict_command.py
+++ b/mhcflurry/predict_command.py
@@ -219,7 +219,7 @@ def run(argv=sys.argv[1:]):
        })
        logging.info(
            "Predicting for %d alleles and %d peptides = %d predictions" % (
-            len(args.alleles), len(args.peptides), len(df)))
+                len(args.alleles), len(args.peptides), len(df)))

    predictions = predictor.predict_to_dataframe(
        peptides=df[args.peptide_column].values,

--- a/mhcflurry/scoring.py
+++ b/mhcflurry/scoring.py
+"""
+Measures of prediction accuracy
+"""
 from __future__ import (
    print_function,
    division,

--- a/mhcflurry/select_pan_allele_models_command.py
+++ b/mhcflurry/select_pan_allele_models_command.py
@@ -24,7 +24,7 @@ tqdm.monitor_interval = 0  # see https://github.com/tqdm/tqdm/issues/481
 from .class1_affinity_predictor import Class1AffinityPredictor
 from .encodable_sequences import EncodableSequences
 from .allele_encoding import AlleleEncoding
-from .common import configure_logging, random_peptides
+from .common import configure_logging
 from .local_parallelism import (
    worker_pool_with_gpu_assignments_from_args,
    add_local_parallelism_args)

--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
@@ -10,7 +10,6 @@ import traceback
 import random
 from functools import partial

-import numpy
 import pandas
 import yaml
 from sklearn.metrics.pairwise import cosine_similarity
@@ -337,7 +336,7 @@ def alleles_by_similarity(allele):
            allele_similarity.columns.to_series().sample(frac=1.0))
    return (
        allele_similarity[allele] + (
-        allele_similarity.index == allele)  # force that we return specified allele first
+            allele_similarity.index == allele)  # force specified allele first
    ).sort_values(ascending=False).index.tolist()



--- a/mhcflurry/train_pan_allele_models_command.py
+++ b/mhcflurry/train_pan_allele_models_command.py
@@ -270,7 +270,8 @@ def run(argv=sys.argv[1:]):
            return main(args)
        except Exception as e:
            print(e)
-            import ipdb ; ipdb.set_trace()
+            import ipdb
+            ipdb.set_trace()
            raise
    else:
        return main(args)
@@ -697,4 +698,3 @@ def train_model(

 if __name__ == '__main__':
    run()
-