diff --git a/mhcflurry/__init__.py b/mhcflurry/__init__.py
index f38e7ec771e7d93cbcb4b4a7c35b7e864892f579..ee705ecf4523af5e97d77a76a2602cd8fa5bd42b 100644
--- a/mhcflurry/__init__.py
+++ b/mhcflurry/__init__.py
@@ -14,3 +14,5 @@
 
 from .class1_binding_predictor import Class1BindingPredictor
 from .predict import predict
+from .ensemble import Ensemble
+from .package_metadata import __version__
diff --git a/mhcflurry/package_metadata.py b/mhcflurry/package_metadata.py
new file mode 100644
index 0000000000000000000000000000000000000000..e62b38a755720fa3b92a923aaa4f29810c2feb63
--- /dev/null
+++ b/mhcflurry/package_metadata.py
@@ -0,0 +1,2 @@
+
+__version__ = "0.0.2"
diff --git a/mhcflurry/paths.py b/mhcflurry/paths.py
index 3e58e57ca951440bb1773989b4de7ce971147a4f..ec9e8377f431a33a4f6f7ec0f5d5538eb7486991 100644
--- a/mhcflurry/paths.py
+++ b/mhcflurry/paths.py
@@ -15,7 +15,10 @@
 from os.path import join
 from appdirs import user_data_dir
 
-BASE_DIRECTORY = user_data_dir("mhcflurry", version="0.1")
+
+# increase the version of the base directory every time we make a breaking change
+# in how the data is represented or how the models are serialized
+BASE_DIRECTORY = user_data_dir("mhcflurry", version="2")
 CLASS1_DATA_DIRECTORY = join(BASE_DIRECTORY, "class1_data")
 CLASS1_MODEL_DIRECTORY = join(BASE_DIRECTORY, "class1_models")
 
diff --git a/requirements.txt b/requirements.txt
index 133fb1329d9bdd05d0f36ce3f21848e3a01b0886..d00634728a9c01a217062c96d5299a2da6415526 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ numpy>= 1.7
 pandas>=0.13.1
 appdirs
 theano
-keras<1.0
\ No newline at end of file
+keras<1.0
+h5py
\ No newline at end of file
diff --git a/script/create-combined-class1-dataset.py b/script/create-combined-class1-dataset.py
index 0dfde3881ed61f3840637e0f9bd8ec6bc1144615..421b4de7fac45a85600024197cf1b1a8f8b05e57 100755
--- a/script/create-combined-class1-dataset.py
+++ b/script/create-combined-class1-dataset.py
@@ -26,20 +26,20 @@ from __future__ import (
     absolute_import,
     unicode_literals
 )
-from os.path import join
+from os import makedirs
+from os.path import join, exists
 import pickle
 from collections import Counter
 import argparse
 
 import pandas as pd
 
-from mhcflurry.paths import CLASS1_DATA_DIRECTORY, CLASS1_DATA_CSV_PATH
+from mhcflurry.paths import CLASS1_DATA_DIRECTORY, CLASS1_DATA_CSV_FILENAME
 
 IEDB_PICKLE_FILENAME = "iedb_human_class1_assay_datasets.pickle"
 IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME)
 
-PETERS_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt"
-PETERS_CSV_PATH = join(CLASS1_DATA_DIRECTORY, PETERS_CSV_FILENAME)
+KIM_2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt"
 
 parser = argparse.ArgumentParser()
 
@@ -71,13 +71,18 @@ parser.add_argument(
 
 parser.add_argument(
     "--netmhcpan-csv-path",
-    default=PETERS_CSV_PATH,
+    default=KIM_2013_CSV_FILENAME,
     help="Path to CSV with NetMHCpan dataset from 2013 Peters paper")
 
 parser.add_argument(
-    "--output-csv-path",
-    default=CLASS1_DATA_CSV_PATH,
-    help="Path to CSV of combined assay results")
+    "--output-dir",
+    default=CLASS1_DATA_DIRECTORY,
+    help="Path to directory where output CSV should be written")
+
+parser.add_argument(
+    "--output-csv-filename",
+    default=CLASS1_DATA_CSV_FILENAME,
+    help="Name of combined CSV file")
 
 parser.add_argument(
     "--extra-dataset-csv-path",
@@ -88,6 +93,9 @@ parser.add_argument(
 if __name__ == "__main__":
     args = parser.parse_args()
 
+    if not exists(args.output_dir):
+        makedirs(args.output_dir)
+
     print("Reading %s..." % args.iedb_pickle_path)
     with open(args.iedb_pickle_path, "rb") as f:
         iedb_datasets = pickle.load(f)
diff --git a/script/create-iedb-class1-dataset.py b/script/create-iedb-class1-dataset.py
index 0cb910a91fa324a7fee1ba665dab107dd4984ed5..abc88ca79735fb571e3e6a250282f698d49a4dbf 100755
--- a/script/create-iedb-class1-dataset.py
+++ b/script/create-iedb-class1-dataset.py
@@ -19,23 +19,42 @@ Turn a raw CSV snapshot of the IEDB contents into a usable
 class I binding prediction dataset by grouping all unique pMHCs
 """
 from collections import defaultdict
-from os.path import join
+from os import makedirs
+from os.path import join, exists
 import pickle
+import argparse
 
 import numpy as np
 import pandas as pd
 
 from mhcflurry.paths import CLASS1_DATA_DIRECTORY
 
+
 IEDB_SOURCE_FILENAME = "mhc_ligand_full.csv"
-IEDB_SOURCE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_SOURCE_FILENAME)
+PICKLE_OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle"
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--input-csv",
+    default=IEDB_SOURCE_FILENAME,
+    help="CSV file with IEDB's MHC binding data")
+
+parser.add_argument(
+    "--output-dir",
+    default=CLASS1_DATA_DIRECTORY,
+    help="Directory to write output pickle file")
 
-OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle"
-OUTPUT_PATH = join(CLASS1_DATA_DIRECTORY, OUTPUT_FILENAME)
+
+parser.add_argument(
+    "--output-pickle-filename",
+    default=PICKLE_OUTPUT_FILENAME,
+    help="Path to .pickle file containing dictionary of IEDB assay datasets")
 
 if __name__ == "__main__":
+    args = parser.parse_args()
     df = pd.read_csv(
-        IEDB_SOURCE_PATH,
+        args.input_csv,
         error_bad_lines=False,
         encoding="latin-1",
         header=[0, 1])
@@ -48,9 +67,7 @@ if __name__ == "__main__":
         "HLA-A",
         "HLA-B",
         "HLA-C",
-        # "H-2-D",
-        # "H-2-K",
-        # "H-2-L",
+        "H-2",
     ]
     for pattern in patterns:
         pattern_mask = alleles.str.startswith(pattern)
@@ -98,5 +115,10 @@ if __name__ == "__main__":
                 "percent_positive",
                 "count"])
         print("# distinct pMHC entries: %d" % len(columns["mhc"]))
-    with open(OUTPUT_PATH, "wb") as f:
+    if not exists(args.output_dir):
+        makedirs(args.output_dir)
+
+    output_path = join(args.output_dir, args.output_pickle_filename)
+
+    with open(args.output, "wb") as f:
         pickle.dump(assay_dataframes, f, pickle.HIGHEST_PROTOCOL)
diff --git a/script/download-iedb.sh b/script/download-iedb.sh
index 5bdd4ae9b5f387df0ef086141819bdf2c3af86cf..3e60b17964ec23da0505c85c3f7f3caf0d3cf316 100755
--- a/script/download-iedb.sh
+++ b/script/download-iedb.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2016. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,6 +17,3 @@
 rm -f mhc_ligand_full*
 wget http://www.iedb.org/doc/mhc_ligand_full.zip
 unzip mhc_ligand_full.zip
-DATA_DIR=`python -c "import mhcflurry; print(mhcflurry.paths.CLASS1_DATA_DIRECTORY)"`
-mkdir -p -- "$DATA_DIR"
-mv mhc_ligand_full.csv "$DATA_DIR"
\ No newline at end of file
diff --git a/script/download-kim-2009-dataset.sh b/script/download-kim-2009-dataset.sh
index 3d4e8f436dd06185b355b90162b708844ef981ee..422c50eb6462e268734de45b785d4e99c906e2d0 100755
--- a/script/download-kim-2009-dataset.sh
+++ b/script/download-kim-2009-dataset.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2016. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,6 +18,3 @@
 # 2013 "Dataset size and composition" paper
 rm -f bdata.2009.mhci.public.1*
 wget https://dl.dropboxusercontent.com/u/3967524/bdata.2009.mhci.public.1.txt
-DATA_DIR=`python -c "import mhcflurry; print(mhcflurry.paths.CLASS1_DATA_DIRECTORY)"`
-mkdir -p -- "$DATA_DIR"
-mv bdata.2009.mhci.public.1.txt "$DATA_DIR"
diff --git a/script/download-kim-2013-dataset.sh b/script/download-kim-2013-dataset.sh
index 780e8cf407ec248be7cf8642d8c08a40f2aee797..48e3fa8b91e0a0c0388b64b1471ec2e4d31e4e0e 100755
--- a/script/download-kim-2013-dataset.sh
+++ b/script/download-kim-2013-dataset.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2016. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,6 +17,3 @@
 # Download dataset from Kim/Peters 2013 "Dataset size and composition" paper
 rm -f bdata.20130222.mhci.public*
 wget https://dl.dropboxusercontent.com/u/3967524/bdata.20130222.mhci.public.1.txt
-DATA_DIR=`python -c "import mhcflurry; print(mhcflurry.paths.CLASS1_DATA_DIRECTORY)"`
-mkdir -p -- "$DATA_DIR"
-mv bdata.20130222.mhci.public.1.txt "$DATA_DIR"
diff --git a/script/download-kim-blind-dataset.sh b/script/download-kim-blind-dataset.sh
index 00c2721eb19f2376a93b29bd9e28cc1beaf57a7e..5aa7cc3407473db4eb08958a72ecb7a59d75b8f9 100755
--- a/script/download-kim-blind-dataset.sh
+++ b/script/download-kim-blind-dataset.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2016. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,8 +17,4 @@
 # Download BLIND 2013 dataset from Kim/Peters 2013
 # "Dataset size and composition" paper
 rm -f bdata.2013.mhci.public.blind*
-
 wget https://dl.dropboxusercontent.com/u/3967524/bdata.2013.mhci.public.blind.1.txt
-DATA_DIR=`python -c "import mhcflurry; print(mhcflurry.paths.CLASS1_DATA_DIRECTORY)"`
-mkdir -p -- "$DATA_DIR"
-mv bdata.2013.mhci.public.blind.1.txt "$DATA_DIR"
diff --git a/script/print-data-dir.sh b/script/print-data-dir.sh
deleted file mode 100755
index 8d0c2da2908de70a43219d990ee93cd9602c015a..0000000000000000000000000000000000000000
--- a/script/print-data-dir.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-DATA_DIR=`python -c "import mhcflurry; print(mhcflurry.paths.CLASS1_DATA_DIRECTORY)"`
-echo "$DATA_DIR"
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 520bf0ff78afa58442227e5af16ecf08d4ab6da0..b16c5b2efd19156ce2e0619acc3fca40548a59f7 100644
--- a/setup.py
+++ b/setup.py
@@ -14,6 +14,7 @@
 
 import os
 import logging
+import re
 
 from setuptools import setup
 
@@ -34,10 +35,17 @@ except:
     logging.warn("Conversion of long_description from MD to RST failed")
     pass
 
+
+with open('mhcflurry/package_metadata.py', 'r') as f:
+    version = re.search(
+        r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
+        f.read(),
+        re.MULTILINE).group(1)
+
 if __name__ == '__main__':
     setup(
         name='mhcflurry',
-        version="0.0.1",
+        version=version,
         description="MHC Binding Predictor",
         author="Alex Rubinsteyn",
         author_email="alex {dot} rubinsteyn {at} mssm {dot} edu",
@@ -58,6 +66,7 @@ if __name__ == '__main__':
             'appdirs',
             'theano',
             'keras',
+            'h5py',
             # using for multi-threaded web server
             'cherrypy'
         ],