diff --git a/mhcflurry/__init__.py b/mhcflurry/__init__.py
index 8538c4d3a9afb4f3b3b07b4de7c38c85f765be0a..5d7ceb0fedefd35efb370b702acaf6df0bf799cc 100644
--- a/mhcflurry/__init__.py
+++ b/mhcflurry/__init__.py
@@ -1,3 +1,7 @@
+"""
+Class I MHC ligand prediction package
+"""
+
 from .class1_affinity_predictor import Class1AffinityPredictor
 from .class1_neural_network import Class1NeuralNetwork
 from .version import __version__
diff --git a/mhcflurry/allele_encoding.py b/mhcflurry/allele_encoding.py
index 2e89bfaf2482508d0715becf37328d3e9bc34d52..06355361dd337c5faaa5aa040932a62a733ece87 100644
--- a/mhcflurry/allele_encoding.py
+++ b/mhcflurry/allele_encoding.py
@@ -138,4 +138,3 @@ class AlleleEncoding(object):
             result = vector_encoded[self.indices]
             self.encoding_cache[cache_key] = result
         return self.encoding_cache[cache_key]
-
diff --git a/mhcflurry/calibrate_percentile_ranks_command.py b/mhcflurry/calibrate_percentile_ranks_command.py
index 31aeccc190c6f411c8fedb5d1610ab347f03c61c..9ced4161ea4617a132ec94f677e5c92743ba04fc 100644
--- a/mhcflurry/calibrate_percentile_ranks_command.py
+++ b/mhcflurry/calibrate_percentile_ranks_command.py
@@ -212,7 +212,7 @@ def run(argv=sys.argv[1:]):
         worker_pool.join()
 
     print("Percent rank calibration time: %0.2f min." % (
-       percent_rank_calibration_time / 60.0))
+        percent_rank_calibration_time / 60.0))
     print("Predictor written to: %s" % args.models_dir)
 
 
diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py
index eca348898af74afd5780adc29ade0f929a1e39c6..10ae6365f29829b14fd2f9f7858027c4ca3bfaf3 100644
--- a/mhcflurry/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_predictor.py
@@ -9,12 +9,13 @@ from os import mkdir, environ
 from socket import gethostname
 from getpass import getuser
 from functools import partial
+from six import string_types
 
-import mhcnames
 import numpy
-import pandas
 from numpy.testing import assert_equal
-from six import string_types
+import pandas
+
+import mhcnames
 
 from .class1_neural_network import Class1NeuralNetwork
 from .common import random_peptides, positional_frequency_matrix
@@ -57,11 +58,11 @@ class Class1AffinityPredictor(object):
         Parameters
         ----------
         allele_to_allele_specific_models : dict of string -> list of `Class1NeuralNetwork`
-            Ensemble of single-allele models to use for each allele. 
-        
+            Ensemble of single-allele models to use for each allele.
+
         class1_pan_allele_models : list of `Class1NeuralNetwork`
             Ensemble of pan-allele models.
-        
+
         allele_to_sequence : dict of string -> string
             MHC allele name to fixed-length amino acid sequence (sometimes
             referred to as the pseudosequence). Required only if
@@ -106,7 +107,7 @@ class Class1AffinityPredictor(object):
         self._cache = {}
         self.optimization_info = {}
 
-        assert isinstance( self.allele_to_allele_specific_models, dict)
+        assert isinstance(self.allele_to_allele_specific_models, dict)
         assert isinstance(self.class1_pan_allele_models, list)
 
     @property
@@ -365,14 +366,14 @@ class Class1AffinityPredictor(object):
             weights_path = self.weights_path(models_dir, row.model_name)
             Class1AffinityPredictor.save_weights(
                 row.model.get_weights(), weights_path)
-            logging.info("Wrote: %s" % weights_path)
+            logging.info("Wrote: %s", weights_path)
 
         write_manifest_df = self.manifest_df[[
             c for c in self.manifest_df.columns if c != "model"
         ]]
         manifest_path = join(models_dir, "manifest.csv")
         write_manifest_df.to_csv(manifest_path, index=False)
-        logging.info("Wrote: %s" % manifest_path)
+        logging.info("Wrote: %s", manifest_path)
 
         if write_metadata:
             # Write "info.txt"
@@ -399,7 +400,7 @@ class Class1AffinityPredictor(object):
             )
             allele_to_sequence_df.to_csv(
                 join(models_dir, "allele_sequences.csv"), index=False)
-            logging.info("Wrote: %s" % join(models_dir, "allele_sequences.csv"))
+            logging.info("Wrote: %s", join(models_dir, "allele_sequences.csv"))
 
         if self.allele_to_percent_rank_transform:
             percent_ranks_df = None
@@ -414,7 +415,7 @@ class Class1AffinityPredictor(object):
                 percent_ranks_path,
                 index=True,
                 index_label="bin")
-            logging.info("Wrote: %s" % percent_ranks_path)
+            logging.info("Wrote: %s", percent_ranks_path)
 
     @staticmethod
     def load(models_dir=None, max_models=None):
@@ -467,7 +468,7 @@ class Class1AffinityPredictor(object):
         if exists(join(models_dir, "allele_sequences.csv")):
             allele_to_sequence = pandas.read_csv(
                 join(models_dir, "allele_sequences.csv"),
-                index_col=0).iloc[:,0].to_dict()
+                index_col=0).iloc[:, 0].to_dict()
 
         allele_to_percent_rank_transform = {}
         percent_ranks_path = join(models_dir, "percent_ranks.csv")
@@ -479,15 +480,15 @@ class Class1AffinityPredictor(object):
 
         logging.info(
             "Loaded %d class1 pan allele predictors, %d allele sequences, "
-            "%d percent rank distributions, and %d allele specific models: %s" % (
-                len(class1_pan_allele_models),
-                len(allele_to_sequence) if allele_to_sequence else 0,
-                len(allele_to_percent_rank_transform),
-                sum(len(v) for v in allele_to_allele_specific_models.values()),
-                ", ".join(
-                    "%s (%d)" % (allele, len(v))
-                    for (allele, v)
-                    in sorted(allele_to_allele_specific_models.items()))))
+            "%d percent rank distributions, and %d allele specific models: %s",
+            len(class1_pan_allele_models),
+            len(allele_to_sequence) if allele_to_sequence else 0,
+            len(allele_to_percent_rank_transform),
+            sum(len(v) for v in allele_to_allele_specific_models.values()),
+            ", ".join(
+                "%s (%d)" % (allele, len(v))
+                for (allele, v)
+                in sorted(allele_to_allele_specific_models.items())))
 
         result = Class1AffinityPredictor(
             allele_to_allele_specific_models=allele_to_allele_specific_models,
@@ -500,7 +501,7 @@ class Class1AffinityPredictor(object):
             logging.info("Optimizing models")
             optimized = result.optimize()
             logging.info(
-                "Optimization " + ("succeeded" if optimized else "failed"))
+                "Optimization %s", ("succeeded" if optimized else "failed"))
         return result
 
     def optimize(self):
@@ -527,7 +528,7 @@ class Class1AffinityPredictor(object):
                         merge_method="concatenate")
                 ]
             except NotImplementedError as e:
-                logging.warning("Optimization failed: %s" % str(e))
+                logging.warning("Optimization failed: %s", str(e))
                 return False
             self._manifest_df = None
             self.clear_cache()
@@ -584,8 +585,8 @@ class Class1AffinityPredictor(object):
         AlleleEncoding
         """
         if (self._master_allele_encoding is None or
-                    self._master_allele_encoding.allele_to_sequence !=
-                    self.allele_to_sequence):
+                self._master_allele_encoding.allele_to_sequence !=
+                self.allele_to_sequence):
             self._master_allele_encoding = AlleleEncoding(
                 allele_to_sequence=self.allele_to_sequence)
         return self._master_allele_encoding
@@ -793,7 +794,7 @@ class Class1AffinityPredictor(object):
         encodable_peptides = EncodableSequences.create(peptides)
         models = []
         for i in range(n_models):
-            logging.info("Training model %d / %d" % (i + 1, n_models))
+            logging.info("Training model %d / %d", i + 1, n_models)
             model = Class1NeuralNetwork(**architecture_hyperparameters)
             model.fit(
                 encodable_peptides,
@@ -879,10 +880,8 @@ class Class1AffinityPredictor(object):
                 msg = "Allele %s has no percentile rank information" % allele
                 if throw:
                     raise ValueError(msg)
-                else:
-                    warnings.warn(msg)
-                    # Return NaNs
-                    return numpy.ones(len(affinities)) * numpy.nan
+                warnings.warn(msg)
+                return numpy.ones(len(affinities)) * numpy.nan  # Return NaNs
 
         if alleles is None:
             raise ValueError("Specify allele or alleles")
@@ -1294,10 +1293,10 @@ class Class1AffinityPredictor(object):
 
         Returns
         ----------
-        None if motif_summary is False
+        dict of string -> pandas.DataFrame
 
-        Otherwise: dict of string -> pandas.DataFrame where keys are
-        "frequency_matrices" and "length_distributions".
+        If motif_summary is True, this will have keys  "frequency_matrices" and
+        "length_distributions". Otherwise it will be empty.
 
         """
         if bins is None:
@@ -1323,7 +1322,7 @@ class Class1AffinityPredictor(object):
         else:
             frequency_matrices = None
             length_distributions = None
-        for (i, allele) in enumerate(alleles):
+        for allele in alleles:
             start = time.time()
             predictions = self.predict(
                 encoded_peptides, allele=allele, model_kwargs=model_kwargs)
@@ -1400,6 +1399,7 @@ class Class1AffinityPredictor(object):
                 'frequency_matrices': frequency_matrices,
                 'length_distributions': length_distributions,
             }
+        return {}
 
     def model_select(
             self,
@@ -1490,4 +1490,3 @@ class Class1AffinityPredictor(object):
                 "model_selection": df,
             })
         return new_predictor
-
diff --git a/mhcflurry/cluster_parallelism.py b/mhcflurry/cluster_parallelism.py
index 976f53a213b20c261bd4d9cf288430652694a0d5..9ec07de96283da2a351053297fee719f7cb20499 100644
--- a/mhcflurry/cluster_parallelism.py
+++ b/mhcflurry/cluster_parallelism.py
@@ -349,4 +349,3 @@ def worker_entry_point(argv=sys.argv[1:]):
         if args.complete_dir:
             os.mkdir(args.complete_dir)
             print("Created: ", args.complete_dir)
-
diff --git a/mhcflurry/common.py b/mhcflurry/common.py
index 7c8e1628682f1d716d2b3dc866c85eb2b897e176..8885637b2e963c98b70dcadf8762cd43cbc2ff4b 100644
--- a/mhcflurry/common.py
+++ b/mhcflurry/common.py
@@ -173,4 +173,4 @@ def positional_frequency_matrix(peptides):
         counts[i + 1] = pandas.Series([p[i] for p in peptides]).value_counts()
     result = (counts / len(peptides)).fillna(0.0).T
     result.index.name = 'position'
-    return result
\ No newline at end of file
+    return result
diff --git a/mhcflurry/custom_loss.py b/mhcflurry/custom_loss.py
index 47d51cb49aa5ffccbcb4dc714d97ba07e1289e91..8c523c0a61ad0576d54c1cd77a15dfdf9ba987c8 100644
--- a/mhcflurry/custom_loss.py
+++ b/mhcflurry/custom_loss.py
@@ -251,4 +251,4 @@ def check_shape(name, arr, expected_shape):
 
 # Register custom losses.
 for cls in [MSEWithInequalities, MSEWithInequalitiesAndMultipleOutputs]:
-    CUSTOM_LOSSES[cls.name] = cls()
\ No newline at end of file
+    CUSTOM_LOSSES[cls.name] = cls()
diff --git a/mhcflurry/downloads.py b/mhcflurry/downloads.py
index ec776f9e093c7309e5f06a54d0b35a52489d0d96..7f29ea521cb558cbf47523b1302b2875041509a5 100644
--- a/mhcflurry/downloads.py
+++ b/mhcflurry/downloads.py
@@ -9,9 +9,9 @@ from __future__ import (
 )
 import logging
 import yaml
-from os.path import join, exists, relpath
-from pipes import quote
+from os.path import join, exists
 from os import environ
+from pipes import quote
 from collections import OrderedDict
 from appdirs import user_data_dir
 from pkg_resources import resource_string
@@ -81,8 +81,7 @@ def get_default_class1_models_dir(test_exists=True):
         if test_exists and not exists(result):
             raise IOError("No such directory: %s" % result)
         return result
-    else:
-        return get_path("models_class1", "models", test_exists=test_exists)
+    return get_path("models_class1", "models", test_exists=test_exists)
 
 
 def get_current_release_downloads():
@@ -160,13 +159,13 @@ def configure():
             metadata["releases"][_CURRENT_RELEASE]["compatibility-version"])
         current_compatability = metadata["current-compatibility-version"]
         if current_release_compatability != current_compatability:
-            logging.warn(
+            logging.warning(
                 "The specified downloads are not compatible with this version "
                 "of the MHCflurry codebase. Downloads: release %s, "
-                "compatability version: %d. Code compatability version: %d" % (
-                    _CURRENT_RELEASE,
-                    current_release_compatability,
-                    current_compatability))
+                "compatability version: %d. Code compatability version: %d",
+                _CURRENT_RELEASE,
+                current_release_compatability,
+                current_compatability)
 
         data_dir = environ.get("MHCFLURRY_DATA_DIR")
         if not data_dir:
@@ -176,6 +175,7 @@ def configure():
             data_dir = user_data_dir("mhcflurry", version="4")
         _DOWNLOADS_DIR = join(data_dir, _CURRENT_RELEASE)
 
-    logging.debug("Configured MHCFLURRY_DOWNLOADS_DIR: %s" % _DOWNLOADS_DIR)
+    logging.debug("Configured MHCFLURRY_DOWNLOADS_DIR: %s", _DOWNLOADS_DIR)
+
 
 configure()
diff --git a/mhcflurry/downloads_command.py b/mhcflurry/downloads_command.py
index 8bf1d213400ddc3c2a7e5d9b6e81ae689189f54e..249269aea94f3a8b3bdcec5f7fce26f41c4e804d 100644
--- a/mhcflurry/downloads_command.py
+++ b/mhcflurry/downloads_command.py
@@ -168,8 +168,7 @@ def fetch_subcommand(args):
                 "\nThe requested download '%s' has already been downloaded. "
                 "To re-download this data, first run: \n\t%s\nin a shell "
                 "and then re-run this command.\n" +
-                "*" * 40)
-                % (name, 'rm -rf ' + quote(get_path(name))))
+                "*" * 40) % (name, 'rm -rf ' + quote(get_path(name))))
         if not info['downloaded'] and (name in args.download_name or default):
             items_to_fetch.add(name)
 
diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py
index f6322835c92af973d673336c4a969e74f3182597..19696e23b9f901811e0fb01b9f16f58a7c0ac613 100644
--- a/mhcflurry/encodable_sequences.py
+++ b/mhcflurry/encodable_sequences.py
@@ -1,3 +1,6 @@
+"""
+Class for encoding variable-length peptides to fixed-size numerical matrices
+"""
 from __future__ import (
     print_function,
     division,
@@ -26,9 +29,12 @@ class EncodingError(ValueError):
 
 class EncodableSequences(object):
     """
-    Sequences of amino acids.
+    Class for encoding variable-length peptides to fixed-size numerical matrices
     
     This class caches various encodings of a list of sequences.
+
+    In practice this is used only for peptides. To encode MHC allele sequences,
+    see AlleleEncoding.
     """
     unknown_character = "X"
 
@@ -299,8 +305,10 @@ class EncodableSequences(object):
             min_length = 5
 
             # Result array is int32, filled with X (null amino acid) value.
-            result = numpy.full(fill_value=amino_acid.AMINO_ACID_INDEX['X'],
-                shape=(len(sequences), max_length * 2), dtype="int32")
+            result = numpy.full(
+                fill_value=amino_acid.AMINO_ACID_INDEX['X'],
+                shape=(len(sequences), max_length * 2),
+                dtype="int32")
 
             df = pandas.DataFrame({"peptide": sequences}, dtype=numpy.object_)
 
@@ -319,9 +327,9 @@ class EncodableSequences(object):
                 # Array of shape (num peptides, length) giving fixed-length
                 # amino acid encoding each peptide of the current length.
                 fixed_length_sequences = numpy.stack(sub_df.peptide.map(
-                    lambda s: numpy.array(
-                        [amino_acid.AMINO_ACID_INDEX[char] for char in
-                            s])).values)
+                    lambda s: numpy.array([
+                        amino_acid.AMINO_ACID_INDEX[char] for char in s
+                    ])).values)
 
                 # Set left edge
                 result[sub_df.index, :length] = fixed_length_sequences
@@ -334,8 +342,10 @@ class EncodableSequences(object):
             min_length = 5
 
             # Result array is int32, filled with X (null amino acid) value.
-            result = numpy.full(fill_value=amino_acid.AMINO_ACID_INDEX['X'],
-                shape=(len(sequences), max_length * 3), dtype="int32")
+            result = numpy.full(
+                fill_value=amino_acid.AMINO_ACID_INDEX['X'],
+                shape=(len(sequences), max_length * 3),
+                dtype="int32")
 
             df = pandas.DataFrame({"peptide": sequences}, dtype=numpy.object_)
 
@@ -354,9 +364,9 @@ class EncodableSequences(object):
                 # Array of shape (num peptides, length) giving fixed-length
                 # amino acid encoding each peptide of the current length.
                 fixed_length_sequences = numpy.stack(sub_df.peptide.map(
-                    lambda s: numpy.array(
-                        [amino_acid.AMINO_ACID_INDEX[char] for char in
-                            s])).values)
+                    lambda s: numpy.array([
+                        amino_acid.AMINO_ACID_INDEX[char] for char in s
+                    ])).values)
 
                 # Set left edge
                 result[sub_df.index, :length] = fixed_length_sequences
diff --git a/mhcflurry/ensemble_centrality.py b/mhcflurry/ensemble_centrality.py
index e370a39d66f31d8e343605a22c38e62bd12160b0..07251bf028e5414abf31afc53db2b2a2abb96e53 100644
--- a/mhcflurry/ensemble_centrality.py
+++ b/mhcflurry/ensemble_centrality.py
@@ -37,4 +37,4 @@ CENTRALITY_MEASURES = {
     "mean": partial(numpy.nanmean, axis=1),
     "median": partial(numpy.nanmedian, axis=1),
     "robust_mean": robust_mean,
-}
\ No newline at end of file
+}
diff --git a/mhcflurry/hyperparameters.py b/mhcflurry/hyperparameters.py
index cc5950d5c175a35c08eb9cfea0d44d64e3fdeb65..1241fa4650378f17247d2c0bea529ba4ac0a0261 100644
--- a/mhcflurry/hyperparameters.py
+++ b/mhcflurry/hyperparameters.py
@@ -1,3 +1,6 @@
+"""
+Hyperparameter (neural network options) management
+"""
 from __future__ import (
     print_function,
     division,
@@ -70,8 +73,7 @@ class HyperparameterDefaults(object):
         if invalid_keys:
             raise ValueError(
                 "No such model parameters: %s. Valid parameters are: %s"
-                % (" ".join(invalid_keys),
-                    " ".join(self.defaults)))
+                % (" ".join(invalid_keys), " ".join(self.defaults)))
 
     def models_grid(self, **kwargs):
         '''
diff --git a/mhcflurry/percent_rank_transform.py b/mhcflurry/percent_rank_transform.py
index a9098bc231675ddbe7444066a506f85ae3107660..a4597686941b97162ac3bcf6f833b3856f354e44 100644
--- a/mhcflurry/percent_rank_transform.py
+++ b/mhcflurry/percent_rank_transform.py
@@ -1,3 +1,6 @@
+"""
+Class for transforming arbitrary values into percent ranks given a distribution.
+"""
 import numpy
 import pandas
 
@@ -77,8 +80,3 @@ class PercentRankTransform(object):
         result.cdf = series.values
         result.bin_edges = series.index.values[1:-1]
         return result
-
-
-
-
-
diff --git a/mhcflurry/predict_command.py b/mhcflurry/predict_command.py
index dea1ddf4c15cc7ec8c1ea605985c9cbb351631d4..17a687f55eff01b46573c84341088ec9d3ecea88 100644
--- a/mhcflurry/predict_command.py
+++ b/mhcflurry/predict_command.py
@@ -219,7 +219,7 @@ def run(argv=sys.argv[1:]):
         })
         logging.info(
             "Predicting for %d alleles and %d peptides = %d predictions" % (
-            len(args.alleles), len(args.peptides), len(df)))
+                len(args.alleles), len(args.peptides), len(df)))
 
     predictions = predictor.predict_to_dataframe(
         peptides=df[args.peptide_column].values,
diff --git a/mhcflurry/scoring.py b/mhcflurry/scoring.py
index f6a256baa18e1bbe01968ce270466791588a34f1..d0d41d4e01e215c8c1e603fd5d1ec09dc08cf673 100644
--- a/mhcflurry/scoring.py
+++ b/mhcflurry/scoring.py
@@ -1,3 +1,6 @@
+"""
+Measures of prediction accuracy
+"""
 from __future__ import (
     print_function,
     division,
diff --git a/mhcflurry/select_pan_allele_models_command.py b/mhcflurry/select_pan_allele_models_command.py
index 0032f8fc5b9553e234473c44ca764420d30210e9..6e6f45fa8c29fd5ead83567ce251e9cb88141e08 100644
--- a/mhcflurry/select_pan_allele_models_command.py
+++ b/mhcflurry/select_pan_allele_models_command.py
@@ -24,7 +24,7 @@ tqdm.monitor_interval = 0  # see https://github.com/tqdm/tqdm/issues/481
 from .class1_affinity_predictor import Class1AffinityPredictor
 from .encodable_sequences import EncodableSequences
 from .allele_encoding import AlleleEncoding
-from .common import configure_logging, random_peptides
+from .common import configure_logging
 from .local_parallelism import (
     worker_pool_with_gpu_assignments_from_args,
     add_local_parallelism_args)
diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py
index fe295b67df5c96080f3834c4cf95cfcf9a9d60fe..ec1b8c8af7adac1e10ef6d3305780b86ea98f41a 100644
--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
@@ -10,7 +10,6 @@ import traceback
 import random
 from functools import partial
 
-import numpy
 import pandas
 import yaml
 from sklearn.metrics.pairwise import cosine_similarity
@@ -337,7 +336,7 @@ def alleles_by_similarity(allele):
             allele_similarity.columns.to_series().sample(frac=1.0))
     return (
         allele_similarity[allele] + (
-        allele_similarity.index == allele)  # force that we return specified allele first
+            allele_similarity.index == allele)  # force specified allele first
     ).sort_values(ascending=False).index.tolist()
 
 
diff --git a/mhcflurry/train_pan_allele_models_command.py b/mhcflurry/train_pan_allele_models_command.py
index d925f991af0cc4d66cf4bbf081eaa6d0d698ec06..a676bb38f2e64b7c83ae1f8b3bfdfd4ab90bb1b0 100644
--- a/mhcflurry/train_pan_allele_models_command.py
+++ b/mhcflurry/train_pan_allele_models_command.py
@@ -270,7 +270,8 @@ def run(argv=sys.argv[1:]):
             return main(args)
         except Exception as e:
             print(e)
-            import ipdb ; ipdb.set_trace()
+            import ipdb
+            ipdb.set_trace()
             raise
     else:
         return main(args)
@@ -697,4 +698,3 @@ def train_model(
 
 if __name__ == '__main__':
     run()
-