diff --git a/mhcflurry/class1_presentation_predictor.py b/mhcflurry/class1_presentation_predictor.py
index 6bbeb08de509319ad81b4d0f29cf124ecbfdf3ff..124ec6218243cd137719072aae9490627b4c8f8b 100644
--- a/mhcflurry/class1_presentation_predictor.py
+++ b/mhcflurry/class1_presentation_predictor.py
@@ -113,6 +113,7 @@ class Class1PresentationPredictor(object):
         df = pandas.DataFrame({
             "peptide": numpy.array(peptides, copy=False),
         })
+        df["peptide_num"] = df.index
         if experiment_names is None:
             peptides = EncodableSequences.create(peptides)
             all_alleles = set()
@@ -141,9 +142,13 @@ class Class1PresentationPredictor(object):
                 new_df["affinity"] = predictions_df[
                     experiment_alleles
                 ].min(1).values
-                new_df["best_allele"] = predictions_df[
-                    experiment_alleles
-                ].idxmin(1).values
+                if len(df) == 0:
+                    new_df["best_allele"] = []
+                else:
+                    new_df["best_allele"] = predictions_df[
+                        experiment_alleles
+                    ].idxmin(1).values
+                dfs.append(new_df)
 
             result_df = pandas.concat(dfs, ignore_index=True)
         else:
@@ -177,8 +182,8 @@ class Class1PresentationPredictor(object):
         if include_affinity_percentile:
             result_df["affinity_percentile"] = (
                 self.affinity_predictor.percentile_ranks(
-                    df.affinity.values,
-                    alleles=df.best_alleles.values,
+                    result_df.affinity.values,
+                    alleles=result_df.best_allele.values,
                     throw=False))
 
         return result_df
@@ -215,6 +220,9 @@ class Class1PresentationPredictor(object):
                 raise ValueError("No processing predictor with flanks")
             predictor = self.processing_predictor_with_flanks
 
+        if len(peptides) == 0:
+            return numpy.array([], dtype=float)
+
         num_chunks = int(numpy.ceil(float(len(peptides)) / PREDICT_CHUNK_SIZE))
         peptide_chunks = numpy.array_split(peptides, num_chunks)
         n_flank_chunks = numpy.array_split(n_flanks, num_chunks)
@@ -384,6 +392,13 @@ class Class1PresentationPredictor(object):
         Presentation scores for each peptide. Scores range from 0 to 1, with
         higher values indicating more favorable presentation likelihood.
         """
+        if isinstance(alleles, dict):
+            if experiment_names is None:
+                raise ValueError(
+                    "experiment_names must be supplied when alleles is a dict. "
+                    "Alternatively, call predict_to_dataframe to predict over "
+                    "all experiments")
+
         return self.predict_to_dataframe(
             peptides=peptides,
             alleles=alleles,
@@ -416,15 +431,17 @@ class Class1PresentationPredictor(object):
         ----------
         peptides : list of string
             Peptide sequences
-        alleles : list of string or string -> string dict
+        alleles : list of string or string -> list of string dict
             If you are predicting for a single sample, pass a list of strings
             (up to 6) indicating the genotype. If you are predicting across
             multiple samples, pass a dict where the keys are (arbitrary)
             experiment names and the values are the alleles to predict for that
             sample.
         experiment_names : list of string [same length as peptides]
-            If you are passing a dict for 'alleles', use this argument to
-            specify which peptides go with which experiments.
+            If you are passing a dict for 'alleles', you can use this argument to
+            specify which peptides go with which experiments. If it is None,
+            then predictions will be performed for each peptide across all
+            experiments.
         n_flanks : list of string [same length as peptides]
             Upstream sequences before the peptide. Sequences of any length can
             be given and a suffix of the size supported by the model will be
@@ -453,14 +470,12 @@ class Class1PresentationPredictor(object):
         if isinstance(alleles, string_types):
             raise TypeError("alleles must be a list or dict")
 
-        if isinstance(alleles, dict):
-            if experiment_names is None:
-                raise ValueError(
-                    "experiment_names must be supplied when alleles is a dict")
-        else:
+        if not isinstance(alleles, dict):
+            # Make alleles into a dict.
             if experiment_names is not None:
                 raise ValueError(
                     "alleles must be a dict when experiment_names is specified")
+
             alleles = numpy.array(alleles, copy=False)
             if len(alleles) > MAX_ALLELES_PER_SAMPLE:
                 raise ValueError(
@@ -472,7 +487,6 @@ class Class1PresentationPredictor(object):
                     "is expected to be the same length as peptides."
                     % MAX_ALLELES_PER_SAMPLE)
 
-            experiment_names = ["experiment1"] * len(peptides)
             alleles = {
                 "experiment1": alleles,
             }
@@ -488,22 +502,27 @@ class Class1PresentationPredictor(object):
 
         df = self.predict_affinity(
             peptides=peptides,
-            experiment_names=experiment_names,
             alleles=alleles,
+            experiment_names=experiment_names,  # might be None
             include_affinity_percentile=include_affinity_percentile,
             verbose=verbose,
             throw=throw)
+
         df["affinity_score"] = from_ic50(df.affinity)
-        df["processing_score"] = processing_scores
+        df["processing_score"] = df.peptide_num.map(
+            pandas.Series(processing_scores))
         if c_flanks is not None:
-            df.insert(1, "c_flank", c_flanks)
+            df.insert(1, "c_flank", df.peptide_num.map(pandas.Series(c_flanks)))
         if n_flanks is not None:
-            df.insert(1, "n_flank", n_flanks)
+            df.insert(1, "n_flank", df.peptide_num.map(pandas.Series(n_flanks)))
 
         model_name = 'with_flanks' if n_flanks is not None else "without_flanks"
         model = self.get_model(model_name)
-        df["presentation_score"] = model.predict_proba(
-            df[self.model_inputs].values)[:,1]
+        if len(df) > 0:
+            df["presentation_score"] = model.predict_proba(
+                df[self.model_inputs].values)[:,1]
+        else:
+            df["presentation_score"] = []
         del df["affinity_score"]
         return df
 
@@ -514,9 +533,9 @@ class Class1PresentationPredictor(object):
             result="best",
             comparison_quantity="presentation_score",
             filter_value=None,
-            peptide_lengths=[8, 9, 10, 11],
+            peptide_lengths=(8, 9, 10, 11),
             use_flanks=True,
-            include_affinity_percentile=False,
+            include_affinity_percentile=True,
             verbose=1,
             throw=True):
         """
@@ -527,22 +546,21 @@ class Class1PresentationPredictor(object):
         sequences : str, list of string, or string -> string dict
             Protein sequences. If a dict is given, the keys are arbitrary (
             e.g. protein names), and the values are the amino acid sequences.
-        alleles : str, list of string, list of list of string, or string -> string dict
+        alleles : list of string, list of list of string, or dict of string -> list of string
             MHC I alleles. Can be: (1) a string (a single allele), (2) a list of
             strings (a single genotype), (3) a list of list of strings
             (multiple genotypes, where the total number of genotypes must equal
-            the number of sequences), or (4) a dict (in which case the keys must
-            match the sequences dict keys).
+            the number of sequences), or (4) a dict giving multiple genotypes,
+            which will each be run over the sequences.
         result : string
-            One of:
-             - "best": return the strongest peptide for each sequence
-             - "all": return predictions for all peptides
-             - "filtered": return predictions where comparison_quantity is
-             stronger (i.e (<) for affinity, (>) for scores) than filter_value.
+            Specify 'best' to return the strongest peptide for each sequence,
+            'all' to return predictions for all peptides, or 'filtered' to
+            return predictions where the comparison_quantity is stronger
+            (i.e (<) for affinity, (>) for scores) than filter_value.
         comparison_quantity : string
-            One of "presentation_score", "processing_score", or "affinity".
-            Quantity to use to rank (if result is "best") or filter (if result
-            is "filtered") results.
+            One of "presentation_score", "processing_score", "affinity", or
+            "affinity_percentile". Prediction to use to rank (if result is
+            "best") or filter (if result is "filtered") results.
         filter_value : float
             Threshold value to use, only relevant when result is "filtered".
             If comparison_quantity is "affinity", then all results less than
@@ -589,42 +607,62 @@ class Class1PresentationPredictor(object):
                 ("sequence_%04d" % (i + 1), sequence)
                 for (i, sequence) in enumerate(sequences))
 
+        cross_product = True
         if isinstance(alleles, string_types):
+            # Case (1) - alleles is a string
             alleles = [alleles]
 
-        if not isinstance(alleles, dict):
-            if all([isinstance(item, string_types) for item in alleles]):
-                alleles = dict((name, alleles) for name in sequences.keys())
-            elif len(alleles) != len(sequences):
+        if isinstance(alleles, dict):
+            if any([isinstance(v, string_types) for v in alleles.values()]):
                 raise ValueError(
-                    "alleles must be (1) a string (a single allele), (2) a list of "
-                    "strings (a single genotype), (3) a list of list of strings ("
-                    "(multiple genotypes, where the total number of genotypes "
-                    "must equal the number of sequences), or (4) a dict (in which "
-                    "case the keys must match the sequences dict keys). Here "
-                    "it seemed like option (3) was being used, but the length "
-                    "of alleles (%d) did not match the length of sequences (%d)."
-                    % (len(alleles), len(sequences)))
+                    "The values in the alleles dict must be lists, not strings")
+        else:
+            if all(isinstance(a, string_types) for a in alleles):
+                # Case (2) - a simple list of alleles
+                alleles = {
+                    'genotype': alleles
+                }
             else:
-                alleles = dict(zip(sequences.keys(), alleles))
+                # Case (3) - a list of lists
+                alleles = collections.OrderedDict(
+                    ("genotype_%04d" % (i + 1), genotype)
+                    for (i, genotype) in enumerate(alleles))
+                cross_product = False
+
+                if len(alleles) != len(sequences):
+                    raise ValueError(
+                        "When passing a list of lists for the alleles argument "
+                        "the length of the list (%d) must match the length of "
+                        "the sequences being predicted (%d)" % (
+                            len(alleles), len(sequences)))
 
-        missing = [key for key in sequences if key not in alleles]
-        if missing:
-            raise ValueError(
-                "Sequence names missing from alleles dict: ", missing)
+        if not isinstance(alleles, dict):
+            raise ValueError("Invalid type for alleles: ", type(alleles))
+
+        experiment_names = None if cross_product else []
+        genotype_names = list(alleles)
+        position_in_sequence = []
+        for (i, (name, sequence)) in enumerate(sequences.items()):
+            genotype_name = None if cross_product else genotype_names[i]
 
-        for (name, sequence) in sequences.items():
             if not isinstance(sequence, string_types):
                 raise ValueError("Expected string, not %s (%s)" % (
                     sequence, type(sequence)))
-            for peptide_start in range(len(sequence) - min(peptide_lengths)):
+            for peptide_start in range(len(sequence) - min(peptide_lengths) + 1):
                 n_flank_start = max(0, peptide_start - n_flank_length)
                 for peptide_length in peptide_lengths:
+                    peptide = sequence[
+                        peptide_start: peptide_start + peptide_length
+                    ]
+                    if len(peptide) != peptide_length:
+                        continue
                     c_flank_end = (
                         peptide_start + peptide_length + c_flank_length)
                     sequence_names.append(name)
-                    peptides.append(
-                        sequence[peptide_start: peptide_start + peptide_length])
+                    position_in_sequence.append(peptide_start)
+                    if not cross_product:
+                        experiment_names.append(genotype_name)
+                    peptides.append(peptide)
                     if use_flanks:
                         n_flanks.append(
                             sequence[n_flank_start : peptide_start])
@@ -636,13 +674,20 @@ class Class1PresentationPredictor(object):
             alleles=alleles,
             n_flanks=n_flanks,
             c_flanks=c_flanks,
-            experiment_names=sequence_names,
+            experiment_names=experiment_names,
             include_affinity_percentile=include_affinity_percentile,
             verbose=verbose,
             throw=throw)
 
-        result_df = result_df.rename(
-            columns={"experiment_name": "sequence_name"})
+        result_df.insert(
+            0,
+            "sequence_name",
+            result_df.peptide_num.map(pandas.Series(sequence_names)))
+        result_df.insert(
+            1,
+            "position_in_sequence",
+            result_df.peptide_num.map(pandas.Series(position_in_sequence)))
+        del result_df["peptide_num"]
 
         comparison_is_score = comparison_quantity.endswith("score")
 
@@ -652,7 +697,8 @@ class Class1PresentationPredictor(object):
 
         if result == "best":
             result_df = result_df.drop_duplicates(
-                "sequence_name", keep="first").sort_values("sequence_name")
+                ["sequence_name", "experiment_name"], keep="first"
+            ).sort_values("sequence_name")
         elif result == "filtered":
             if comparison_is_score:
                 result_df = result_df.loc[
diff --git a/mhcflurry/fasta.py b/mhcflurry/fasta.py
new file mode 100644
index 0000000000000000000000000000000000000000..88b87092b04a0d10bca73a90cce6a88f2e07aeea
--- /dev/null
+++ b/mhcflurry/fasta.py
@@ -0,0 +1,120 @@
+"""
+Adapted from pyensembl, github.com/openvax/pyensembl
+Original implementation by Alex Rubinsteyn.
+
+The worse sin in bioinformatics is to write your own FASTA parser.
+We're doing this to avoid adding another dependency to MHCflurry, however.
+"""
+
+from __future__ import print_function, division, absolute_import
+
+from gzip import GzipFile
+import logging
+
+from six import binary_type, PY3
+
+import pandas
+
+
+def read_fasta_to_dataframe(filename):
+    reader = FastaParser()
+    rows = reader.iterate_over_file(filename)
+    return pandas.DataFrame(
+        rows,
+        columns=["sequence_id", "sequence"])
+
+class FastaParser(object):
+    """
+    FastaParser object consumes lines of a FASTA file incrementally.
+    """
+    def __init__(self):
+        self.current_id = None
+        self.current_lines = []
+
+    def iterate_over_file(self, fasta_path):
+        """
+        Generator that yields identifiers paired with sequences.
+        """
+        with self.open_file(fasta_path) as f:
+            for line in f:
+                line = line.rstrip()
+
+                if len(line) == 0:
+                    continue
+
+                # have to slice into a bytes object or else get a single integer
+                first_char = line[0:1]
+
+                if first_char == b">":
+                    previous_entry = self._current_entry()
+                    self.current_id = self._parse_header_id(line)
+
+                    if len(self.current_id) == 0:
+                        logging.warning(
+                            "Unable to parse ID from header line: %s", line)
+
+                    self.current_lines = []
+
+                    if previous_entry is not None:
+                        yield previous_entry
+
+                elif first_char == b";":
+                    # semicolon are comment characters
+                    continue
+                else:
+                    self.current_lines.append(line)
+
+        # the last sequence is still in the lines buffer after we're done with
+        # the file so make sure to yield it
+        id_and_seq = self._current_entry()
+        if id_and_seq is not None:
+            yield id_and_seq
+
+    def _current_entry(self):
+        # when we hit a new entry, if this isn't the first
+        # entry of the file then put the last one in the dictionary
+        if self.current_id:
+            if len(self.current_lines) == 0:
+                logging.warning("No sequence data for '%s'", self.current_id)
+            else:
+                sequence = b"".join(self.current_lines)
+                if PY3:
+                    # only decoding into an ASCII str for Python 3 since
+                    # the binary sequence type for Python 2 is already 'str'
+                    # and the unicode representation is inefficient
+                    # (using either 16 or 32 bits per character depends on build)
+                    sequence = sequence.decode("ascii")
+                return self.current_id, sequence
+
+    @staticmethod
+    def open_file(fasta_path):
+        """
+        Open either a text file or compressed gzip file as a stream of bytes.
+        """
+        if fasta_path.endswith("gz") or fasta_path.endswith("gzip"):
+            return GzipFile(fasta_path, 'rb')
+        else:
+            return open(fasta_path, 'rb')
+
+    @staticmethod
+    def _parse_header_id(line):
+        """
+        Pull the transcript or protein identifier from the header line
+        which starts with '>'
+        """
+        if type(line) is not binary_type:
+            raise TypeError("Expected header line to be of type %s but got %s" % (
+                binary_type, type(line)))
+
+        if len(line) <= 1:
+            raise ValueError("No identifier on FASTA line")
+
+        # split line at first space to get the unique identifier for
+        # this sequence
+        space_index = line.find(b" ")
+        if space_index >= 0:
+            identifier = line[1:space_index]
+        else:
+            identifier = line[1:]
+
+        return identifier.decode("ascii")
\ No newline at end of file
diff --git a/mhcflurry/predict_scan_command.py b/mhcflurry/predict_scan_command.py
new file mode 100644
index 0000000000000000000000000000000000000000..f535e6c43f5bdff485b46baaeabbf85b8e461132
--- /dev/null
+++ b/mhcflurry/predict_scan_command.py
@@ -0,0 +1,328 @@
+'''
+Scan protein sequences using the MHCflurry presentation predictor.
+
+By default, subsequences with affinity percentile ranks less than 2.0 are
+returned. You can also specify --results-all to return predictions for all
+subsequences, or --results-best to return the top subsequence for each sequence.
+
+Examples:
+
+Scan a set of sequences in a FASTA file for binders to any alleles in a MHC I
+genotype:
+
+    mhcflurry-predict-scan \
+        test/data/example.fasta \
+        --alleles HLA-A*02:01,HLA-A*03:01,HLA-B*57:01,HLA-B*45:01,HLA-C*02:01,HLA-C*07:02
+
+Instead of a FASTA, you can also pass a CSV that has "sequence_id" and "sequence"
+columns.
+
+You can also specify multiple MHC I genotypes to scan:
+
+    mhcflurry-predict-scan \
+        test/data/example.fasta \
+        --alleles \
+            HLA-A*02:01,HLA-A*03:01,HLA-B*57:01,HLA-B*45:01,HLA-C*02:01,HLA-C*07:02 \
+            HLA-A*01:01,HLA-A*02:06,HLA-B*68:01,HLA-B*07:02,HLA-C*01:01,HLA-C*03:01
+
+If `--out` is not specified, results are written to standard out.
+
+You can also run on sequences specified on the commandline:
+
+mhcflurry-predict-scan \
+    --sequences MGYINVFAFPFTIYSLLLCRMNSRNYIAQVDVVNFNLT \
+    --alleles HLA-A0201 H-2Kb
+'''
+from __future__ import (
+    print_function,
+    division,
+    absolute_import,
+)
+
+import sys
+import argparse
+import itertools
+import logging
+import os
+
+import pandas
+
+from .downloads import get_default_class1_presentation_models_dir
+from .class1_affinity_predictor import Class1AffinityPredictor
+from .class1_presentation_predictor import Class1PresentationPredictor
+from .fasta import read_fasta_to_dataframe
+from .version import __version__
+
+
+parser = argparse.ArgumentParser(
+    description=__doc__,
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    add_help=False)
+
+
+helper_args = parser.add_argument_group(title="Help")
+helper_args.add_argument(
+    "-h", "--help",
+    action="help",
+    help="Show this help message and exit"
+)
+helper_args.add_argument(
+    "--list-supported-alleles",
+    action="store_true",
+    default=False,
+    help="Prints the list of supported alleles and exits"
+)
+helper_args.add_argument(
+    "--list-supported-peptide-lengths",
+    action="store_true",
+    default=False,
+    help="Prints the list of supported peptide lengths and exits"
+)
+helper_args.add_argument(
+    "--version",
+    action="version",
+    version="mhcflurry %s" % __version__,
+)
+
+input_args = parser.add_argument_group(title="Input options")
+input_args.add_argument(
+    "input",
+    metavar="INPUT.csv",
+    nargs="?",
+    help="Input CSV or FASTA")
+input_args.add_argument(
+    "--input-format",
+    choices=("guess", "csv", "fasta"),
+    default="guess",
+    help="Format of input file. By default, it is guessed from the file "
+         "extension.")
+input_args.add_argument(
+    "--alleles",
+    metavar="ALLELE",
+    nargs="+",
+    help="Alleles to predict")
+input_args.add_argument(
+    "--sequences",
+    metavar="SEQ",
+    nargs="+",
+    help="Sequences to predict (exclusive with passing an input file)")
+input_args.add_argument(
+    "--sequence-id-column",
+    metavar="NAME",
+    default="sequence_id",
+    help="Input CSV column name for sequence IDs. Default: '%(default)s'")
+input_args.add_argument(
+    "--sequence-column",
+    metavar="NAME",
+    default="sequence",
+    help="Input CSV column name for sequences. Default: '%(default)s'")
+input_args.add_argument(
+    "--no-throw",
+    action="store_true",
+    default=False,
+    help="Return NaNs for unsupported alleles or peptides instead of raising")
+
+results_args = parser.add_argument_group(title="Result options")
+results_args.add_argument(
+    "--peptide-lengths",
+    type=int,
+    nargs="+",
+    default=[8, 9, 10, 11],
+    help="Peptide lengths to consider. Default: %(default)s.")
+comparison_quantities = [
+    "presentation_score",
+    "processing_score",
+    "affinity",
+    "affinity_percentile",
+]
+results_args.add_argument(
+    "--results-all",
+    action="store_true",
+    default=False,
+    help="")
+results_args.add_argument(
+    "--results-best",
+    choices=comparison_quantities,
+    help="Take the top result for each sequence according to the specified "
+    "predicted quantity")
+results_args.add_argument(
+    "--results-filtered",
+    choices=comparison_quantities,
+    help="Filter results by the specified quantity.")
+results_args.add_argument(
+    "--threshold-presentation-score",
+    type=float,
+    default=0.7,
+    help="Threshold if filtering by presentation score. Default: %(default)s")
+results_args.add_argument(
+    "--threshold-processing-score",
+    type=float,
+    default=0.5,
+    help="Threshold if filtering by processing score. Default: %(default)s")
+results_args.add_argument(
+    "--threshold-affinity",
+    type=float,
+    default=500,
+    help="Threshold if filtering by affinity. Default: %(default)s")
+results_args.add_argument(
+    "--threshold-affinity-percentile",
+    type=float,
+    default=2.0,
+    help="Threshold if filtering by affinity percentile. Default: %(default)s")
+
+
+output_args = parser.add_argument_group(title="Output options")
+output_args.add_argument(
+    "--out",
+    metavar="OUTPUT.csv",
+    help="Output CSV")
+output_args.add_argument(
+    "--output-delimiter",
+    metavar="CHAR",
+    default=",",
+    help="Delimiter character for results. Default: '%(default)s'")
+output_args.add_argument(
+    "--no-affinity-percentile",
+    default=False,
+    action="store_true",
+    help="Do not include affinity percentile rank")
+
+model_args = parser.add_argument_group(title="Model options")
+model_args.add_argument(
+    "--models",
+    metavar="DIR",
+    default=None,
+    help="Directory containing presentation models."
+    "Default: %s" % get_default_class1_presentation_models_dir(
+        test_exists=False))
+model_args.add_argument(
+    "--no-flanking",
+    action="store_true",
+    default=False,
+    help="Do not use flanking sequence information in predictions")
+
+
+def run(argv=sys.argv[1:]):
+    logging.getLogger('tensorflow').disabled = True
+
+    if not argv:
+        parser.print_help()
+        parser.exit(1)
+
+    args = parser.parse_args(argv)
+
+    # It's hard to pass a tab in a shell, so we correct a common error:
+    if args.output_delimiter == "\\t":
+        args.output_delimiter = "\t"
+
+    result_args = {
+        "all": args.results_all,
+        "best": args.results_best,
+        "filtered": args.results_filtered,
+    }
+    if all([not bool(arg) for arg in result_args.values()]):
+        result_args["filtered"] = "affinity_percentile"
+
+    if sum([bool(arg) for arg in result_args.values()]) > 1:
+        parser.error(
+            "Specify at most one of --results-all, --results-best, "
+            "--results-filtered")
+
+    (result,) = [key for (key, value) in result_args.items() if value]
+    result_comparison_quantity = result_args[result]
+    result_filter_value = None if result != "filtered" else {
+        "presentation_score": args.threshold_presentation_score,
+        "processing_score": args.threshold_processing_score,
+        "affinity": args.threshold_affinity,
+        "affinity_percentile": args.threshold_affinity_percentile,
+    }[result_comparison_quantity]
+
+    models_dir = args.models
+    if models_dir is None:
+        # The reason we set the default here instead of in the argument parser
+        # is that we want to test_exists at this point, so the user gets a
+        # message instructing them to download the models if needed.
+        models_dir = get_default_class1_presentation_models_dir(test_exists=True)
+
+    predictor = Class1PresentationPredictor.load(models_dir)
+
+    if args.list_supported_alleles:
+        print("\n".join(predictor.supported_alleles))
+        return
+
+    if args.list_supported_peptide_lengths:
+        min_len, max_len = predictor.supported_peptide_lengths
+        print("\n".join([str(l) for l in range(min_len, max_len+1)]))
+        return
+
+    if args.input:
+        if args.sequences:
+            parser.error(
+                "If an input file is specified, do not specify --sequences")
+
+        input_format = args.input_format
+        if input_format == "guess":
+            extension = args.input.lower().split(".")[-1]
+            if extension in ["gz", "bzip2"]:
+                extension = args.input.lower().split(".")[-2]
+
+            if extension == "csv":
+                input_format = "csv"
+            elif extension in ["fasta", "fa"]:
+                input_format = "fasta"
+            else:
+                parser.error(
+                    "Couldn't guess input format from file extension: %s\n"
+                    "Pass the --input-format argument to specify if it is a "
+                    "CSV or fasta file" % args.input)
+            print("Guessed input file format:", input_format)
+
+        if input_format == "csv":
+            df = pandas.read_csv(args.input)
+            print("Read input CSV with %d rows, columns are: %s" % (
+                len(df), ", ".join(df.columns)))
+            for col in [args.sequence_column,]:
+                if col not in df.columns:
+                    raise ValueError(
+                        "No such column '%s' in CSV. Columns are: %s" % (
+                            col, ", ".join(["'%s'" % c for c in df.columns])))
+
+        elif input_format == "fasta":
+            df = read_fasta_to_dataframe(args.input)
+            print("Read input fasta with %d sequences" % len(df))
+            print(df)
+        else:
+            raise ValueError("Unsupported input format", input_format)
+    else:
+        if not args.sequences:
+            parser.error(
+                "Specify either an input file or the --sequences argument")
+
+        df = pandas.DataFrame({
+            args.sequence_column: args.sequences,
+        })
+
+    if args.sequence_id_column not in df:
+        df[args.sequence_id_column] = "sequence_" + df.index.astype(str)
+
+    df = df.set_index(args.sequence_id_column)
+
+    genotypes = pandas.Series(args.alleles).str.split(r"[,\s]+")
+    genotypes.index = genotypes.index.map(lambda i: "genotype_%02d" % i)
+
+    result_df = predictor.predict_sequences(
+        sequences=df[args.sequence_column].to_dict(),
+        alleles=genotypes.to_dict(),
+        result=result,
+        comparison_quantity=result_comparison_quantity,
+        filter_value=result_filter_value,
+        peptide_lengths=args.peptide_lengths,
+        use_flanks=not args.no_flanking,
+        include_affinity_percentile=not args.no_affinity_percentile,
+        throw=not args.no_throw)
+
+    if args.out:
+        result_df.to_csv(args.out, index=False, sep=args.output_delimiter)
+        print("Wrote: %s" % args.out)
+    else:
+        result_df.to_csv(sys.stdout, index=False, sep=args.output_delimiter)
diff --git a/setup.py b/setup.py
index c2dbed429600e8ec0767a9181c6a983c67b573b7..aaaf760380d6bca5521825b79f86189d6ce9c935 100644
--- a/setup.py
+++ b/setup.py
@@ -73,6 +73,7 @@ if __name__ == '__main__':
             'console_scripts': [
                 'mhcflurry-downloads = mhcflurry.downloads_command:run',
                 'mhcflurry-predict = mhcflurry.predict_command:run',
+                'mhcflurry-predict-scan = mhcflurry.predict_scan_command:run',
                 'mhcflurry-class1-train-allele-specific-models = '
                     'mhcflurry.train_allele_specific_models_command:run',
                 'mhcflurry-class1-train-pan-allele-models = '
diff --git a/test/data/example.fasta b/test/data/example.fasta
new file mode 100644
index 0000000000000000000000000000000000000000..56b0ab3274e7fe359e16bff3c06b2eb2b7e54d09
--- /dev/null
+++ b/test/data/example.fasta
@@ -0,0 +1,26 @@
+>QHN73810.1 surface glycoprotein [Severe acute respiratory syndrome coronavirus 2]
+MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHV
+SGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPF
+LGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPI
+NLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYN
+ENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASV
+YAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIAD
+YNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYF
+PLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFL
+PFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLT
+PTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLG
+AENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGI
+AVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDC
+LGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIG
+VTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDI
+LSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLM
+SFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNT
+FVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVA
+KNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDD
+SEPVLKGVKLHYT
+>protein1
+MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQV
+EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHH
+>protein2
+VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGA
+ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC
diff --git a/test/test_class1_presentation_predictor.py b/test/test_class1_presentation_predictor.py
index 242f60875341995ec3c9e82e5382ec4a87ab47bd..b0ab1612664f948e7a7cab7d983daa164729f304 100644
--- a/test/test_class1_presentation_predictor.py
+++ b/test/test_class1_presentation_predictor.py
@@ -132,6 +132,87 @@ def test_basic():
         numpy.testing.assert_array_almost_equal(
             test_df["prediction2"], other_test_df["prediction2"], decimal=6)
 
+
+def test_downloaded_predictor_small():
+    global PRESENTATION_PREDICTOR
+
+    # Test sequence scanning
+    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+        sequences=[
+            "MESLVPGFN",
+            "QPYVFIKRS",
+            "AGGHSYGAD",
+        ],
+        alleles={
+            "HLA-A*02:01": ["HLA-A*02:01"],
+            "HLA-C*02:01": ["HLA-C*02:01"],
+        },
+        peptide_lengths=[9],
+        result="best")
+    print(scan_results)
+    assert_equal(len(scan_results), 6)
+
+    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+        sequences=[
+            "MESLVPGFN",
+            "QPYVFIKRS",
+            "AGGHSYGAD",
+        ],
+        alleles={
+            "HLA-A*02:01": ["HLA-A*02:01"],
+            "HLA-C*02:01": ["HLA-C*02:01"],
+        },
+        peptide_lengths=[8, 9],
+        result="best")
+    print(scan_results)
+    assert_equal(len(scan_results), 6)
+
+    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+        sequences=[
+            "MESLVPGFN",
+            "QPYVFIKRS",
+            "AGGHSYGAD",
+        ],
+        alleles={
+            "HLA-A*02:01": ["HLA-A*02:01"],
+            "HLA-C*02:01": ["HLA-C*02:01"],
+        },
+        peptide_lengths=[9],
+        result="all")
+    print(scan_results)
+    assert_equal(len(scan_results), 6)
+
+    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+        sequences=[
+            "MESLVPGFN",
+            "QPYVFIKRS",
+            "AGGHSYGAD",
+        ],
+        alleles={
+            "HLA-A*02:01": ["HLA-A*02:01"],
+            "HLA-C*02:01": ["HLA-C*02:01"],
+        },
+        peptide_lengths=[8, 9],
+        result="all")
+    print(scan_results)
+    assert_equal(len(scan_results), 18)
+
+    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+        sequences=[
+            "MESLVPGFN",
+            "QPYVFIKRS",
+            "AGGHSYGAD",
+        ],
+        alleles={
+            "HLA-A*02:01": ["HLA-A*02:01"],
+            "HLA-C*02:01": ["HLA-C*02:01"],
+        },
+        peptide_lengths=[10],
+        result="all")
+    print(scan_results)
+    assert_equal(len(scan_results), 0)
+
+
 def test_downloaded_predictor():
     global PRESENTATION_PREDICTOR
 
@@ -220,3 +301,32 @@ def test_downloaded_predictor():
 
     assert len(scan_results4) > 200, len(scan_results4)
     assert_less(scan_results4.iloc[0].affinity, 100)
+
+    scan_results5 = PRESENTATION_PREDICTOR.predict_sequences(
+        result="all",
+        comparison_quantity="affinity",
+        sequences={
+            "seq1": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLE",
+            "seq2": "QPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKG",
+            "seq3": "AGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDG",
+        },
+        alleles={
+            "sample1": [
+                "HLA-A*02:01",
+                "HLA-A*03:01",
+                "HLA-B*57:01",
+                "HLA-B*44:02",
+                "HLA-C*02:01",
+                "HLA-C*07:01",
+            ],
+            "sample2": [
+                "HLA-A*01:01",
+                "HLA-A*02:06",
+                "HLA-B*07:02",
+                "HLA-B*44:02",
+                "HLA-C*03:01",
+                "HLA-C*07:02",
+            ],
+        })
+    print(scan_results5)
+    assert_equal(len(scan_results5), len(scan_results4) * 2)