diff --git a/mhcflurry/__init__.py b/mhcflurry/__init__.py
index 9ad57b59e3b83e5f6f6305fa544615a301e84053..c1a908084f56c3d68d61ac181d2bc2d0ac642b35 100644
--- a/mhcflurry/__init__.py
+++ b/mhcflurry/__init__.py
@@ -1,4 +1,27 @@
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import paths
+from . import data_helpers
+from . import feedforward
+from . import common
 from .mhc1_binding_predictor import Mhc1BindingPredictor
-import paths
-import data_helpers
-import feedforward
+
+__all__ = [
+    "paths",
+    "data_helpers",
+    "feedforward",
+    "common",
+    "Mhc1BindingPredictor"
+]
\ No newline at end of file
diff --git a/mhcflurry/amino_acid.py b/mhcflurry/amino_acid.py
index ba24c6a02320f8a63633bbcc7c01a2aa2a8e6bf4..b606a7a335fdb94d76c514af4a05b62fd065bb8f 100644
--- a/mhcflurry/amino_acid.py
+++ b/mhcflurry/amino_acid.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 amino_acids = {
     "A": "Alanine",
     "R": "Arginine",
diff --git a/mhcflurry/class1_allele_specific_hyperparameters.py b/mhcflurry/class1_allele_specific_hyperparameters.py
index 4610a10824f3db7298b6b6bbd8e3990b37d71945..68368f8c402daebddd29c5bbc13bfc5c632a4cc0 100644
--- a/mhcflurry/class1_allele_specific_hyperparameters.py
+++ b/mhcflurry/class1_allele_specific_hyperparameters.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 N_PRETRAIN_EPOCHS = 10
 N_EPOCHS = 100
 ACTIVATION = "relu"
diff --git a/mhcflurry/common.py b/mhcflurry/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..17e4c4d9a4558500b7b490ae4ed0e394f0e6b6f5
--- /dev/null
+++ b/mhcflurry/common.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def parse_int_list(s):
+    return [int(part.strip() for part in s.split(","))]
+
+def split_peptide_sequences(s):
+    return [part.strip().upper() for part in s.split(",")]
diff --git a/mhcflurry/data_helpers.py b/mhcflurry/data_helpers.py
index b81cfe193548cf9b041c8c3272d36f06822cb572..d3143d6bac3c34d71a0173ce206a3687523dd9c5 100644
--- a/mhcflurry/data_helpers.py
+++ b/mhcflurry/data_helpers.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from collections import namedtuple
 import pandas as pd
 import numpy as np
diff --git a/mhcflurry/feedforward.py b/mhcflurry/feedforward.py
index 032179d34c0aea211cf00de30f71fabf94bdedbf..8dde278626559f0d62b3c46527033f4e7cf1416a 100644
--- a/mhcflurry/feedforward.py
+++ b/mhcflurry/feedforward.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import keras
 from keras.models import Sequential
 from keras.layers.core import Dense, Activation, Flatten, Dropout
diff --git a/mhcflurry/mhc1_binding_predictor.py b/mhcflurry/mhc1_binding_predictor.py
index 5594724dff52a93dca0024be585cddd395d97809..0d3814190a63c0c90263567f074443427064d872 100644
--- a/mhcflurry/mhc1_binding_predictor.py
+++ b/mhcflurry/mhc1_binding_predictor.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """
 Allele specific MHC Class I binding affinity predictor
 """
diff --git a/mhcflurry/paths.py b/mhcflurry/paths.py
index 323ad63dcd0895394a3af97e269063e3401dfadf..2a587612491296be594d028f98c4fdb01801a2fd 100644
--- a/mhcflurry/paths.py
+++ b/mhcflurry/paths.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from os.path import join
 from appdirs import user_data_dir
 
diff --git a/scripts/build-iedb-class1-dataset.py b/scripts/build-iedb-class1-dataset.py
deleted file mode 100755
index 70a7fa738bf7cfc69a9b5efea27bbdd4d9fe51ac..0000000000000000000000000000000000000000
--- a/scripts/build-iedb-class1-dataset.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""
-Turn a raw CSV snapshot of the IEDB contents into a usable
-class I binding prediction dataset by grouping all unique pMHCs
-"""
-from collections import defaultdict
-from os.path import join
-import pickle
-
-import numpy as np
-import pandas as pd
-
-from mhcflurry.paths import CLASS1_DATA_DIRECTORY
-
-IEDB_SOURCE_FILENAME = "mhc_ligand_full.csv"
-IEDB_SOURCE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_SOURCE_FILENAME)
-print(IEDB_SOURCE_PATH)
-OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle"
-OUTPUT_PATH = join(CLASS1_DATA_DIRECTORY, OUTPUT_FILENAME)
-
-if __name__ == "__main__":
-    df = pd.read_csv(
-        IEDB_SOURCE_PATH,
-        error_bad_lines=False,
-        encoding="latin-1",
-        header=[0, 1])
-    alleles = df["MHC"]["Allele Name"]
-    n = len(alleles)
-    print("# total: %d" % n)
-
-    mask = np.zeros(n, dtype=bool)
-    patterns = [
-        "HLA-A",
-        "HLA-B",
-        "HLA-C",
-        # "H-2-D",
-        # "H-2-K",
-        # "H-2-L",
-    ]
-    for pattern in patterns:
-        pattern_mask = alleles.str.startswith(pattern)
-        print("# %s: %d" % (pattern, pattern_mask.sum()))
-        mask |= pattern_mask
-    df = df[mask]
-    print("# entries matching allele masks: %d" % (len(df)))
-    assay_group = df["Assay"]["Assay Group"]
-    assay_method = df["Assay"]["Method/Technique"]
-    groups = df.groupby([assay_group, assay_method])
-    print("---")
-    print("Assays")
-    assay_dataframes = {}
-    # create a dataframe for every distinct kind of assay which is used
-    # by IEDB submitters to measure peptide-MHC affinity or stability
-    for (assay_group, assay_method), group_data in sorted(
-            groups, key=lambda x: len(x[1]), reverse=True):
-        print("%s (%s): %d" % (assay_group, assay_method, len(group_data)))
-        group_alleles = group_data["MHC"]["Allele Name"]
-        group_peptides = group_data["Epitope"]["Description"]
-        distinct_pmhc = group_data.groupby([group_alleles, group_peptides])
-        columns = defaultdict(list)
-        for (allele, peptide), pmhc_group in distinct_pmhc:
-            columns["mhc"].append(allele)
-            columns["peptide"].append(peptide)
-            # performing median in log space since in two datapoint case
-            # we don't want to take e.g. (10 + 1000) / 2.0 = 505
-            # but would prefer something like 10 ** ( (1 + 3) / 2.0) = 100
-            columns["value"].append(
-                np.exp(
-                    np.median(
-                        np.log(
-                            pmhc_group["Assay"]["Quantitative measurement"]))))
-            qualitative = pmhc_group["Assay"]["Qualitative Measure"]
-            columns["percent_positive"].append(
-                qualitative.str.startswith("Positive").mean())
-            columns["count"].append(
-                pmhc_group["Assay"]["Quantitative measurement"].count())
-        assay_dataframes[(assay_group, assay_method)] = pd.DataFrame(
-            columns,
-            columns=[
-                "mhc",
-                "peptide",
-                "value",
-                "percent_positive",
-                "count"])
-        print("# distinct pMHC entries: %d" % len(columns["mhc"]))
-    with open(OUTPUT_PATH, "w") as f:
-        pickle.dump(assay_dataframes, f, pickle.HIGHEST_PROTOCOL)
diff --git a/scripts/download-iedb.sh b/scripts/download-iedb.sh
index bc962debbfb7974ea0cecb0aeecfbb1cf06fa659..5bdd4ae9b5f387df0ef086141819bdf2c3af86cf 100755
--- a/scripts/download-iedb.sh
+++ b/scripts/download-iedb.sh
@@ -1,4 +1,19 @@
 #!/usr/bin/env bash
+
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 rm -f mhc_ligand_full*
 wget http://www.iedb.org/doc/mhc_ligand_full.zip
 unzip mhc_ligand_full.zip
diff --git a/scripts/download-peters-2013-dataset.sh b/scripts/download-peters-2013-dataset.sh
index 76d814aa85a8c8a9d7bcd506cc057b77c63a4a8d..780e8cf407ec248be7cf8642d8c08a40f2aee797 100755
--- a/scripts/download-peters-2013-dataset.sh
+++ b/scripts/download-peters-2013-dataset.sh
@@ -1,5 +1,19 @@
 #!/usr/bin/env bash
 
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Download dataset from Kim/Peters 2013 "Dataset size and composition" paper
 rm -f bdata.20130222.mhci.public*
 wget https://dl.dropboxusercontent.com/u/3967524/bdata.20130222.mhci.public.1.txt
diff --git a/scripts/mhcflurry.py b/scripts/mhcflurry.py
new file mode 100755
index 0000000000000000000000000000000000000000..b361640417902fd58612e679550f09bb6cd77c3c
--- /dev/null
+++ b/scripts/mhcflurry.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+
+from mhcflurry.common import parse_int_list, split_peptide_sequences
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument("--mhc",
+    default="HLA-A*02:01",
+    help="Comma separated list of MHC alleles")
+
+parser.add_argument("--sequence",
+    required=True,
+    type=split_peptide_sequences,
+    help="Comma separated list of protein sequences")
+
+parser.add_argument("--fasta-file",
+    help="FASTA file of protein sequences to chop up into peptides")
+
+parser.add_argument("--peptide-lengths",
+    default=[9],
+    type=parse_int_list,
+    help="Comma separated list of peptide length, e.g. 8,9,10,11")
+
+if __name__ == "__main__":
+    args = parser.parse_args()
diff --git a/setup.py b/setup.py
index d1da6015dffdf5c92114374f5cb32ef31bb616cb..d32dd22d147e0625ce9c54f1c0cb6256864a6da5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2014. Mount Sinai School of Medicine
+# Copyright (c) 2015. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ if __name__ == '__main__':
         install_requires=[
             'numpy>=1.7',
             'pandas>=0.13.1',
-	    'appdirs',
+            'appdirs',
         ],
         long_description=readme,
         packages=['mhcflurry'],