From 7f1f671df4e11bba7938934d749a9ab6b2d90e56 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Thu, 5 Sep 2019 18:15:56 -0400
Subject: [PATCH] pylint nits

---
 mhcflurry/__init__.py                         |  4 ++
 mhcflurry/allele_encoding.py                  |  1 -
 .../calibrate_percentile_ranks_command.py     |  2 +-
 mhcflurry/class1_affinity_predictor.py        | 69 +++++++++----------
 mhcflurry/cluster_parallelism.py              |  1 -
 mhcflurry/common.py                           |  2 +-
 mhcflurry/custom_loss.py                      |  2 +-
 mhcflurry/downloads.py                        | 20 +++---
 mhcflurry/downloads_command.py                |  3 +-
 mhcflurry/encodable_sequences.py              | 32 ++++++---
 mhcflurry/ensemble_centrality.py              |  2 +-
 mhcflurry/hyperparameters.py                  |  6 +-
 mhcflurry/percent_rank_transform.py           |  8 +--
 mhcflurry/predict_command.py                  |  2 +-
 mhcflurry/scoring.py                          |  3 +
 mhcflurry/select_pan_allele_models_command.py |  2 +-
 .../train_allele_specific_models_command.py   |  3 +-
 mhcflurry/train_pan_allele_models_command.py  |  4 +-
 18 files changed, 89 insertions(+), 77 deletions(-)

diff --git a/mhcflurry/__init__.py b/mhcflurry/__init__.py
index 8538c4d3..5d7ceb0f 100644
--- a/mhcflurry/__init__.py
+++ b/mhcflurry/__init__.py
@@ -1,3 +1,7 @@
+"""
+Class I MHC ligand prediction package
+"""
+
 from .class1_affinity_predictor import Class1AffinityPredictor
 from .class1_neural_network import Class1NeuralNetwork
 from .version import __version__
diff --git a/mhcflurry/allele_encoding.py b/mhcflurry/allele_encoding.py
index 2e89bfaf..06355361 100644
--- a/mhcflurry/allele_encoding.py
+++ b/mhcflurry/allele_encoding.py
@@ -138,4 +138,3 @@ class AlleleEncoding(object):
             result = vector_encoded[self.indices]
             self.encoding_cache[cache_key] = result
         return self.encoding_cache[cache_key]
-
diff --git a/mhcflurry/calibrate_percentile_ranks_command.py b/mhcflurry/calibrate_percentile_ranks_command.py
index 31aeccc1..9ced4161 100644
--- a/mhcflurry/calibrate_percentile_ranks_command.py
+++ b/mhcflurry/calibrate_percentile_ranks_command.py
@@ -212,7 +212,7 @@ def run(argv=sys.argv[1:]):
         worker_pool.join()
 
     print("Percent rank calibration time: %0.2f min." % (
-       percent_rank_calibration_time / 60.0))
+        percent_rank_calibration_time / 60.0))
     print("Predictor written to: %s" % args.models_dir)
 
 
diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py
index eca34889..10ae6365 100644
--- a/mhcflurry/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_predictor.py
@@ -9,12 +9,13 @@ from os import mkdir, environ
 from socket import gethostname
 from getpass import getuser
 from functools import partial
+from six import string_types
 
-import mhcnames
 import numpy
-import pandas
 from numpy.testing import assert_equal
-from six import string_types
+import pandas
+
+import mhcnames
 
 from .class1_neural_network import Class1NeuralNetwork
 from .common import random_peptides, positional_frequency_matrix
@@ -57,11 +58,11 @@ class Class1AffinityPredictor(object):
         Parameters
         ----------
         allele_to_allele_specific_models : dict of string -> list of `Class1NeuralNetwork`
-            Ensemble of single-allele models to use for each allele. 
-        
+            Ensemble of single-allele models to use for each allele.
+
         class1_pan_allele_models : list of `Class1NeuralNetwork`
             Ensemble of pan-allele models.
-        
+
         allele_to_sequence : dict of string -> string
             MHC allele name to fixed-length amino acid sequence (sometimes
             referred to as the pseudosequence). Required only if
@@ -106,7 +107,7 @@ class Class1AffinityPredictor(object):
         self._cache = {}
         self.optimization_info = {}
 
-        assert isinstance( self.allele_to_allele_specific_models, dict)
+        assert isinstance(self.allele_to_allele_specific_models, dict)
         assert isinstance(self.class1_pan_allele_models, list)
 
     @property
@@ -365,14 +366,14 @@ class Class1AffinityPredictor(object):
             weights_path = self.weights_path(models_dir, row.model_name)
             Class1AffinityPredictor.save_weights(
                 row.model.get_weights(), weights_path)
-            logging.info("Wrote: %s" % weights_path)
+            logging.info("Wrote: %s", weights_path)
 
         write_manifest_df = self.manifest_df[[
             c for c in self.manifest_df.columns if c != "model"
         ]]
         manifest_path = join(models_dir, "manifest.csv")
         write_manifest_df.to_csv(manifest_path, index=False)
-        logging.info("Wrote: %s" % manifest_path)
+        logging.info("Wrote: %s", manifest_path)
 
         if write_metadata:
             # Write "info.txt"
@@ -399,7 +400,7 @@ class Class1AffinityPredictor(object):
             )
             allele_to_sequence_df.to_csv(
                 join(models_dir, "allele_sequences.csv"), index=False)
-            logging.info("Wrote: %s" % join(models_dir, "allele_sequences.csv"))
+            logging.info("Wrote: %s", join(models_dir, "allele_sequences.csv"))
 
         if self.allele_to_percent_rank_transform:
             percent_ranks_df = None
@@ -414,7 +415,7 @@ class Class1AffinityPredictor(object):
                 percent_ranks_path,
                 index=True,
                 index_label="bin")
-            logging.info("Wrote: %s" % percent_ranks_path)
+            logging.info("Wrote: %s", percent_ranks_path)
 
     @staticmethod
     def load(models_dir=None, max_models=None):
@@ -467,7 +468,7 @@ class Class1AffinityPredictor(object):
         if exists(join(models_dir, "allele_sequences.csv")):
             allele_to_sequence = pandas.read_csv(
                 join(models_dir, "allele_sequences.csv"),
-                index_col=0).iloc[:,0].to_dict()
+                index_col=0).iloc[:, 0].to_dict()
 
         allele_to_percent_rank_transform = {}
         percent_ranks_path = join(models_dir, "percent_ranks.csv")
@@ -479,15 +480,15 @@ class Class1AffinityPredictor(object):
 
         logging.info(
             "Loaded %d class1 pan allele predictors, %d allele sequences, "
-            "%d percent rank distributions, and %d allele specific models: %s" % (
-                len(class1_pan_allele_models),
-                len(allele_to_sequence) if allele_to_sequence else 0,
-                len(allele_to_percent_rank_transform),
-                sum(len(v) for v in allele_to_allele_specific_models.values()),
-                ", ".join(
-                    "%s (%d)" % (allele, len(v))
-                    for (allele, v)
-                    in sorted(allele_to_allele_specific_models.items()))))
+            "%d percent rank distributions, and %d allele specific models: %s",
+            len(class1_pan_allele_models),
+            len(allele_to_sequence) if allele_to_sequence else 0,
+            len(allele_to_percent_rank_transform),
+            sum(len(v) for v in allele_to_allele_specific_models.values()),
+            ", ".join(
+                "%s (%d)" % (allele, len(v))
+                for (allele, v)
+                in sorted(allele_to_allele_specific_models.items())))
 
         result = Class1AffinityPredictor(
             allele_to_allele_specific_models=allele_to_allele_specific_models,
@@ -500,7 +501,7 @@ class Class1AffinityPredictor(object):
             logging.info("Optimizing models")
             optimized = result.optimize()
             logging.info(
-                "Optimization " + ("succeeded" if optimized else "failed"))
+                "Optimization %s", ("succeeded" if optimized else "failed"))
         return result
 
     def optimize(self):
@@ -527,7 +528,7 @@ class Class1AffinityPredictor(object):
                         merge_method="concatenate")
                 ]
             except NotImplementedError as e:
-                logging.warning("Optimization failed: %s" % str(e))
+                logging.warning("Optimization failed: %s", str(e))
                 return False
             self._manifest_df = None
             self.clear_cache()
@@ -584,8 +585,8 @@ class Class1AffinityPredictor(object):
         AlleleEncoding
         """
         if (self._master_allele_encoding is None or
-                    self._master_allele_encoding.allele_to_sequence !=
-                    self.allele_to_sequence):
+                self._master_allele_encoding.allele_to_sequence !=
+                self.allele_to_sequence):
             self._master_allele_encoding = AlleleEncoding(
                 allele_to_sequence=self.allele_to_sequence)
         return self._master_allele_encoding
@@ -793,7 +794,7 @@ class Class1AffinityPredictor(object):
         encodable_peptides = EncodableSequences.create(peptides)
         models = []
         for i in range(n_models):
-            logging.info("Training model %d / %d" % (i + 1, n_models))
+            logging.info("Training model %d / %d", i + 1, n_models)
             model = Class1NeuralNetwork(**architecture_hyperparameters)
             model.fit(
                 encodable_peptides,
@@ -879,10 +880,8 @@ class Class1AffinityPredictor(object):
                 msg = "Allele %s has no percentile rank information" % allele
                 if throw:
                     raise ValueError(msg)
-                else:
-                    warnings.warn(msg)
-                    # Return NaNs
-                    return numpy.ones(len(affinities)) * numpy.nan
+                warnings.warn(msg)
+                return numpy.ones(len(affinities)) * numpy.nan  # Return NaNs
 
         if alleles is None:
             raise ValueError("Specify allele or alleles")
@@ -1294,10 +1293,10 @@ class Class1AffinityPredictor(object):
 
         Returns
         ----------
-        None if motif_summary is False
+        dict of string -> pandas.DataFrame
 
-        Otherwise: dict of string -> pandas.DataFrame where keys are
-        "frequency_matrices" and "length_distributions".
+        If motif_summary is True, this will have keys  "frequency_matrices" and
+        "length_distributions". Otherwise it will be empty.
 
         """
         if bins is None:
@@ -1323,7 +1322,7 @@ class Class1AffinityPredictor(object):
         else:
             frequency_matrices = None
             length_distributions = None
-        for (i, allele) in enumerate(alleles):
+        for allele in alleles:
             start = time.time()
             predictions = self.predict(
                 encoded_peptides, allele=allele, model_kwargs=model_kwargs)
@@ -1400,6 +1399,7 @@ class Class1AffinityPredictor(object):
                 'frequency_matrices': frequency_matrices,
                 'length_distributions': length_distributions,
             }
+        return {}
 
     def model_select(
             self,
@@ -1490,4 +1490,3 @@ class Class1AffinityPredictor(object):
                 "model_selection": df,
             })
         return new_predictor
-
diff --git a/mhcflurry/cluster_parallelism.py b/mhcflurry/cluster_parallelism.py
index 976f53a2..9ec07de9 100644
--- a/mhcflurry/cluster_parallelism.py
+++ b/mhcflurry/cluster_parallelism.py
@@ -349,4 +349,3 @@ def worker_entry_point(argv=sys.argv[1:]):
         if args.complete_dir:
             os.mkdir(args.complete_dir)
             print("Created: ", args.complete_dir)
-
diff --git a/mhcflurry/common.py b/mhcflurry/common.py
index 7c8e1628..8885637b 100644
--- a/mhcflurry/common.py
+++ b/mhcflurry/common.py
@@ -173,4 +173,4 @@ def positional_frequency_matrix(peptides):
         counts[i + 1] = pandas.Series([p[i] for p in peptides]).value_counts()
     result = (counts / len(peptides)).fillna(0.0).T
     result.index.name = 'position'
-    return result
\ No newline at end of file
+    return result
diff --git a/mhcflurry/custom_loss.py b/mhcflurry/custom_loss.py
index 47d51cb4..8c523c0a 100644
--- a/mhcflurry/custom_loss.py
+++ b/mhcflurry/custom_loss.py
@@ -251,4 +251,4 @@ def check_shape(name, arr, expected_shape):
 
 # Register custom losses.
 for cls in [MSEWithInequalities, MSEWithInequalitiesAndMultipleOutputs]:
-    CUSTOM_LOSSES[cls.name] = cls()
\ No newline at end of file
+    CUSTOM_LOSSES[cls.name] = cls()
diff --git a/mhcflurry/downloads.py b/mhcflurry/downloads.py
index ec776f9e..7f29ea52 100644
--- a/mhcflurry/downloads.py
+++ b/mhcflurry/downloads.py
@@ -9,9 +9,9 @@ from __future__ import (
 )
 import logging
 import yaml
-from os.path import join, exists, relpath
-from pipes import quote
+from os.path import join, exists
 from os import environ
+from pipes import quote
 from collections import OrderedDict
 from appdirs import user_data_dir
 from pkg_resources import resource_string
@@ -81,8 +81,7 @@ def get_default_class1_models_dir(test_exists=True):
         if test_exists and not exists(result):
             raise IOError("No such directory: %s" % result)
         return result
-    else:
-        return get_path("models_class1", "models", test_exists=test_exists)
+    return get_path("models_class1", "models", test_exists=test_exists)
 
 
 def get_current_release_downloads():
@@ -160,13 +159,13 @@ def configure():
             metadata["releases"][_CURRENT_RELEASE]["compatibility-version"])
         current_compatability = metadata["current-compatibility-version"]
         if current_release_compatability != current_compatability:
-            logging.warn(
+            logging.warning(
                 "The specified downloads are not compatible with this version "
                 "of the MHCflurry codebase. Downloads: release %s, "
-                "compatability version: %d. Code compatability version: %d" % (
-                    _CURRENT_RELEASE,
-                    current_release_compatability,
-                    current_compatability))
+                "compatability version: %d. Code compatability version: %d",
+                _CURRENT_RELEASE,
+                current_release_compatability,
+                current_compatability)
 
         data_dir = environ.get("MHCFLURRY_DATA_DIR")
         if not data_dir:
@@ -176,6 +175,7 @@ def configure():
             data_dir = user_data_dir("mhcflurry", version="4")
         _DOWNLOADS_DIR = join(data_dir, _CURRENT_RELEASE)
 
-    logging.debug("Configured MHCFLURRY_DOWNLOADS_DIR: %s" % _DOWNLOADS_DIR)
+    logging.debug("Configured MHCFLURRY_DOWNLOADS_DIR: %s", _DOWNLOADS_DIR)
+
 
 configure()
diff --git a/mhcflurry/downloads_command.py b/mhcflurry/downloads_command.py
index 8bf1d213..249269ae 100644
--- a/mhcflurry/downloads_command.py
+++ b/mhcflurry/downloads_command.py
@@ -168,8 +168,7 @@ def fetch_subcommand(args):
                 "\nThe requested download '%s' has already been downloaded. "
                 "To re-download this data, first run: \n\t%s\nin a shell "
                 "and then re-run this command.\n" +
-                "*" * 40)
-                % (name, 'rm -rf ' + quote(get_path(name))))
+                "*" * 40) % (name, 'rm -rf ' + quote(get_path(name))))
         if not info['downloaded'] and (name in args.download_name or default):
             items_to_fetch.add(name)
 
diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py
index f6322835..19696e23 100644
--- a/mhcflurry/encodable_sequences.py
+++ b/mhcflurry/encodable_sequences.py
@@ -1,3 +1,6 @@
+"""
+Class for encoding variable-length peptides to fixed-size numerical matrices
+"""
 from __future__ import (
     print_function,
     division,
@@ -26,9 +29,12 @@ class EncodingError(ValueError):
 
 class EncodableSequences(object):
     """
-    Sequences of amino acids.
+    Class for encoding variable-length peptides to fixed-size numerical matrices
     
     This class caches various encodings of a list of sequences.
+
+    In practice this is used only for peptides. To encode MHC allele sequences,
+    see AlleleEncoding.
     """
     unknown_character = "X"
 
@@ -299,8 +305,10 @@ class EncodableSequences(object):
             min_length = 5
 
             # Result array is int32, filled with X (null amino acid) value.
-            result = numpy.full(fill_value=amino_acid.AMINO_ACID_INDEX['X'],
-                shape=(len(sequences), max_length * 2), dtype="int32")
+            result = numpy.full(
+                fill_value=amino_acid.AMINO_ACID_INDEX['X'],
+                shape=(len(sequences), max_length * 2),
+                dtype="int32")
 
             df = pandas.DataFrame({"peptide": sequences}, dtype=numpy.object_)
 
@@ -319,9 +327,9 @@ class EncodableSequences(object):
                 # Array of shape (num peptides, length) giving fixed-length
                 # amino acid encoding each peptide of the current length.
                 fixed_length_sequences = numpy.stack(sub_df.peptide.map(
-                    lambda s: numpy.array(
-                        [amino_acid.AMINO_ACID_INDEX[char] for char in
-                            s])).values)
+                    lambda s: numpy.array([
+                        amino_acid.AMINO_ACID_INDEX[char] for char in s
+                    ])).values)
 
                 # Set left edge
                 result[sub_df.index, :length] = fixed_length_sequences
@@ -334,8 +342,10 @@ class EncodableSequences(object):
             min_length = 5
 
             # Result array is int32, filled with X (null amino acid) value.
-            result = numpy.full(fill_value=amino_acid.AMINO_ACID_INDEX['X'],
-                shape=(len(sequences), max_length * 3), dtype="int32")
+            result = numpy.full(
+                fill_value=amino_acid.AMINO_ACID_INDEX['X'],
+                shape=(len(sequences), max_length * 3),
+                dtype="int32")
 
             df = pandas.DataFrame({"peptide": sequences}, dtype=numpy.object_)
 
@@ -354,9 +364,9 @@ class EncodableSequences(object):
                 # Array of shape (num peptides, length) giving fixed-length
                 # amino acid encoding each peptide of the current length.
                 fixed_length_sequences = numpy.stack(sub_df.peptide.map(
-                    lambda s: numpy.array(
-                        [amino_acid.AMINO_ACID_INDEX[char] for char in
-                            s])).values)
+                    lambda s: numpy.array([
+                        amino_acid.AMINO_ACID_INDEX[char] for char in s
+                    ])).values)
 
                 # Set left edge
                 result[sub_df.index, :length] = fixed_length_sequences
diff --git a/mhcflurry/ensemble_centrality.py b/mhcflurry/ensemble_centrality.py
index e370a39d..07251bf0 100644
--- a/mhcflurry/ensemble_centrality.py
+++ b/mhcflurry/ensemble_centrality.py
@@ -37,4 +37,4 @@ CENTRALITY_MEASURES = {
     "mean": partial(numpy.nanmean, axis=1),
     "median": partial(numpy.nanmedian, axis=1),
     "robust_mean": robust_mean,
-}
\ No newline at end of file
+}
diff --git a/mhcflurry/hyperparameters.py b/mhcflurry/hyperparameters.py
index cc5950d5..1241fa46 100644
--- a/mhcflurry/hyperparameters.py
+++ b/mhcflurry/hyperparameters.py
@@ -1,3 +1,6 @@
+"""
+Hyperparameter (neural network options) management
+"""
 from __future__ import (
     print_function,
     division,
@@ -70,8 +73,7 @@ class HyperparameterDefaults(object):
         if invalid_keys:
             raise ValueError(
                 "No such model parameters: %s. Valid parameters are: %s"
-                % (" ".join(invalid_keys),
-                    " ".join(self.defaults)))
+                % (" ".join(invalid_keys), " ".join(self.defaults)))
 
     def models_grid(self, **kwargs):
         '''
diff --git a/mhcflurry/percent_rank_transform.py b/mhcflurry/percent_rank_transform.py
index a9098bc2..a4597686 100644
--- a/mhcflurry/percent_rank_transform.py
+++ b/mhcflurry/percent_rank_transform.py
@@ -1,3 +1,6 @@
+"""
+Class for transforming arbitrary values into percent ranks given a distribution.
+"""
 import numpy
 import pandas
 
@@ -77,8 +80,3 @@ class PercentRankTransform(object):
         result.cdf = series.values
         result.bin_edges = series.index.values[1:-1]
         return result
-
-
-
-
-
diff --git a/mhcflurry/predict_command.py b/mhcflurry/predict_command.py
index dea1ddf4..17a687f5 100644
--- a/mhcflurry/predict_command.py
+++ b/mhcflurry/predict_command.py
@@ -219,7 +219,7 @@ def run(argv=sys.argv[1:]):
         })
         logging.info(
             "Predicting for %d alleles and %d peptides = %d predictions" % (
-            len(args.alleles), len(args.peptides), len(df)))
+                len(args.alleles), len(args.peptides), len(df)))
 
     predictions = predictor.predict_to_dataframe(
         peptides=df[args.peptide_column].values,
diff --git a/mhcflurry/scoring.py b/mhcflurry/scoring.py
index f6a256ba..d0d41d4e 100644
--- a/mhcflurry/scoring.py
+++ b/mhcflurry/scoring.py
@@ -1,3 +1,6 @@
+"""
+Measures of prediction accuracy
+"""
 from __future__ import (
     print_function,
     division,
diff --git a/mhcflurry/select_pan_allele_models_command.py b/mhcflurry/select_pan_allele_models_command.py
index 0032f8fc..6e6f45fa 100644
--- a/mhcflurry/select_pan_allele_models_command.py
+++ b/mhcflurry/select_pan_allele_models_command.py
@@ -24,7 +24,7 @@ tqdm.monitor_interval = 0  # see https://github.com/tqdm/tqdm/issues/481
 from .class1_affinity_predictor import Class1AffinityPredictor
 from .encodable_sequences import EncodableSequences
 from .allele_encoding import AlleleEncoding
-from .common import configure_logging, random_peptides
+from .common import configure_logging
 from .local_parallelism import (
     worker_pool_with_gpu_assignments_from_args,
     add_local_parallelism_args)
diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py
index fe295b67..ec1b8c8a 100644
--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
@@ -10,7 +10,6 @@ import traceback
 import random
 from functools import partial
 
-import numpy
 import pandas
 import yaml
 from sklearn.metrics.pairwise import cosine_similarity
@@ -337,7 +336,7 @@ def alleles_by_similarity(allele):
             allele_similarity.columns.to_series().sample(frac=1.0))
     return (
         allele_similarity[allele] + (
-        allele_similarity.index == allele)  # force that we return specified allele first
+            allele_similarity.index == allele)  # force specified allele first
     ).sort_values(ascending=False).index.tolist()
 
 
diff --git a/mhcflurry/train_pan_allele_models_command.py b/mhcflurry/train_pan_allele_models_command.py
index d925f991..a676bb38 100644
--- a/mhcflurry/train_pan_allele_models_command.py
+++ b/mhcflurry/train_pan_allele_models_command.py
@@ -270,7 +270,8 @@ def run(argv=sys.argv[1:]):
             return main(args)
         except Exception as e:
             print(e)
-            import ipdb ; ipdb.set_trace()
+            import ipdb
+            ipdb.set_trace()
             raise
     else:
         return main(args)
@@ -697,4 +698,3 @@ def train_model(
 
 if __name__ == '__main__':
     run()
-
-- 
GitLab