diff --git a/.travis.yml b/.travis.yml
index 4a81675a84c60db8eb5fbebf3a24d893b18c021f..5e15afcd37249cb69df8e3aa2408c3c69085289a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -39,14 +39,17 @@ script:
   # download training data
   - script/download-kim-2013-dataset.sh
   - script/download-iedb.sh
-  # only install data for A*01:01, A*02:01, HLA-A*02:05 for testing
-  - script/create-iedb-class1-dataset.py --alleles HLA-A*01:01 HLA-A*02:01 HLA-A*02:05
+  # only install data for A*01:01, A*02:01, HLA-A*02:05, HLA-B*07:02, H-2-KD
+  # for testing
+  - >
+    script/create-iedb-class1-dataset.py
+    --alleles HLA-A*01:01 HLA-A*02:01 HLA-A*02:05 HLA-B*07:02 H-2-KD
   - script/create-combined-class1-dataset.py
-  # only installing A0101, A0201, A0205 for testing purposes and with very limited
-  # training
+  # only installing A0101, A0201, A0205, B0702, H-2-Kd for testing purposes and
+  # with very limited training
   - >
       mhcflurry-train-class1-allele-specific-models.py
-      --alleles HLA-A0101 HLA-A0201 HLA-A0205
+      --alleles HLA-A0101 HLA-A0201 HLA-A0205 HLA-B0702 H-2-KD
       --embedding-size 10
       --hidden-layer-size 10
       --training-epochs 100
diff --git a/mhcflurry/dataset.py b/mhcflurry/dataset.py
index 86575221a034059857f4038b1f8ccfbf6207dd85..b6a42d2878ec2418cca490bdcb11c33f4c10f82b 100644
--- a/mhcflurry/dataset.py
+++ b/mhcflurry/dataset.py
@@ -124,10 +124,11 @@ class Dataset(object):
         Two datasets are equal if they contain the same number of samples
         with the same properties and values.
         """
-        if len(self) != len(other):
+        if type(other) is not Dataset:
             return False
-
-        if len(self.columns) != len(other.columns):
+        elif len(self) != len(other):
+            return False
+        elif len(self.columns) != len(other.columns):
             return False
 
         for ci, cj in zip(self.columns, other.columns):
@@ -161,14 +162,18 @@ class Dataset(object):
 
     def groupby_allele(self):
         """
-        Returns a dictionary mapping each allele name to a Dataset containing
-        just that allele's data.
+        Yields a sequence of tuples of allele names with Datasets containing
+        entries just for that allele.
+        """
+        for (allele_name, group_df) in self.to_dataframe().groupby("allele"):
+            yield (allele_name, Dataset(group_df))
+
+    def groupby_allele_dictionary(self):
+        """
+        Returns dictionary mapping each allele name to a Dataset containing
+        only entries from that allele.
         """
-        return {
-            allele_name: Dataset(group_df)
-            for (allele_name, group_df)
-            in self.to_dataframe().groupby("allele")
-        }
+        return dict(self.groupby_allele())
 
     def to_nested_dictionary(self, combine_fn=geometric_mean):
         """
@@ -187,7 +192,9 @@ class Dataset(object):
             # dictionary mapping each peptide to a list of affinities
             peptide_to_affinity_dict = defaultdict(list)
             peptide_to_weight_dict = defaultdict(list)
-            for _, peptide, affinity, sample_weight in allele_dataset.itertuples():
+            for (allele, peptide), row in allele_dataset.iterrows():
+                affinity = row["affinity"]
+                sample_weight = row["sample_weight"]
                 peptide_to_affinity_dict[peptide].append(affinity)
                 peptide_to_weight_dict[peptide].append(sample_weight)
             allele_to_peptide_to_affinity_dict[allele] = {
@@ -196,6 +203,7 @@ class Dataset(object):
                     peptide_to_weight_dict[peptide])
                 for peptide in peptide_to_affinity_dict.keys()
             }
+        return allele_to_peptide_to_affinity_dict
 
     @classmethod
     def from_sequences(
@@ -254,7 +262,8 @@ class Dataset(object):
 
     @classmethod
     def from_nested_dictionary(
-            cls, allele_to_peptide_to_affinity_dict):
+            cls,
+            allele_to_peptide_to_affinity_dict):
         """
         Given nested dictionaries mapping allele -> peptide -> affinity,
         construct a Dataset with uniform sample weights.
@@ -272,6 +281,13 @@ class Dataset(object):
             peptides=peptides,
             affinities=affinities)
 
+    @classmethod
+    def create_empty(cls):
+        """
+        Returns an empty Dataset containing no pMHC entries.
+        """
+        return cls.from_nested_dictionary({})
+
     @classmethod
     def from_single_allele_dictionary(
             cls,
@@ -281,8 +297,7 @@ class Dataset(object):
         Given a peptide->affinity dictionary for a single allele,
         create a Dataset.
         """
-        return cls.from_allele_to_peptide_to_affinity_dictionary(
-            {allele_name: peptide_to_affinity_dict})
+        return cls.from_nested_dictionary({allele_name: peptide_to_affinity_dict})
 
     @classmethod
     def from_csv(
@@ -424,11 +439,18 @@ class Dataset(object):
                 allow_unknown_amino_acids=allow_unknown_amino_acids)
         original_peptide_indices = np.asarray(original_peptide_indices)
         counts = np.asarray(counts)
-        assert len(original_peptide_indices) == len(self.affinities)
-        assert len(counts) == len(self.affinities)
         kmer_affinities = self.affinities[original_peptide_indices]
-        sample_weights = 1.0 / counts
-        return X_index, kmer_affinities, sample_weights, original_peptide_indices
+        kmer_sample_weights = self.sample_weights[original_peptide_indices]
+
+        assert len(original_peptide_indices) == len(kmer_affinities)
+        assert len(counts) == len(kmer_affinities)
+        assert len(kmer_sample_weights) == len(kmer_affinities)
+
+        # combine the original sample weights of varying length peptides
+        # with a 1/n_kmers factor for the number of kmers pulled out of each
+        # original peptide
+        combined_sample_weights = kmer_sample_weights * (1.0 / counts)
+        return X_index, kmer_affinities, combined_sample_weights, original_peptide_indices
 
     def to_dense_pMHC_affinity_matrix(self):
         """
@@ -442,11 +464,14 @@ class Dataset(object):
         n_peptides = len(peptides_list)
         alleles_list = list(sorted(self.unique_alleles()))
         allele_order = {a: i for (i, a) in enumerate(alleles_list)}
-        n_alleles = alleles_list
-        X = np.ones((n_peptides, n_alleles))
+        n_alleles = len(alleles_list)
+        shape = (n_peptides, n_alleles)
+        X = np.ones(shape, dtype=float)
         for (allele, allele_dict) in allele_to_peptide_to_affinity_dict.items():
+            print(allele, allele_dict)
             column_index = allele_order[allele]
             for (peptide, affinity) in allele_dict.items():
+                print(peptide, affinity)
                 row_index = peptide_order[peptide]
                 X[row_index, column_index] = affinity
         return X, peptides_list, alleles_list
diff --git a/mhcflurry/imputation.py b/mhcflurry/imputation.py
index bbccb6812b82f18ea5152d953ec88d6b7b472302..c9bf9c6e95243824b3b055e1b65a52634476fd98 100644
--- a/mhcflurry/imputation.py
+++ b/mhcflurry/imputation.py
@@ -138,6 +138,7 @@ def create_imputed_datasets(
     Returns dictionary mapping allele names to AlleleData objects containing
     imputed affinity values.
     """
+    assert isinstance(dataset, Dataset), "Expected Dataset, got %s" % (type(dataset),)
     X_incomplete, peptide_list, allele_list = dataset.to_dense_pMHC_affinity_matrix()
 
     _check_dense_pMHC_array(X_incomplete, peptide_list, allele_list)
diff --git a/test/test_dataset.py b/test/test_dataset.py
index da551d20cb954268a6072872aada54bfefb18908..29dd612151494ba20780725ea132cb7753f9541a 100644
--- a/test/test_dataset.py
+++ b/test/test_dataset.py
@@ -6,7 +6,7 @@ def test_create_allele_data_from_single_allele_dict():
         ("A" * 10): 1.2,
         ("C" * 9): 1000,
     }
-    dataset = Dataset.from_peptide_to_affinity_dictionary(
+    dataset = Dataset.from_single_allele_dictionary(
         allele_name="A0201",
         peptide_to_affinity_dict=peptide_to_ic50_dict)
     assert isinstance(dataset, Dataset)
diff --git a/test/test_imputation.py b/test/test_imputation.py
index 3ebb66d891bda4420bcaf4783bfbf7ca35ed9a24..308fd5f85346b14e3cba543d17209c7929d1b637 100644
--- a/test/test_imputation.py
+++ b/test/test_imputation.py
@@ -1,3 +1,4 @@
+
 from mhcflurry.imputation import (
     create_imputed_datasets,
     imputer_from_name,
@@ -8,14 +9,17 @@ from mhcflurry import Class1BindingPredictor
 
 from fancyimpute import MICE, KNN, SoftImpute, IterativeSVD
 from nose.tools import eq_
-import numpy as np
+
 
 def test_create_imputed_datasets_empty():
-    result = create_imputed_datasets({}, imputer=MICE(n_imputations=25))
-    eq_(result, {})
+    empty_dataset = Dataset.create_empty()
+    result = create_imputed_datasets(
+        empty_dataset,
+        imputer=MICE(n_imputations=25))
+    eq_(result, empty_dataset)
 
 def test_create_imputed_datasets_two_alleles():
-    allele_to_peptide_to_affinity_dict = {
+    dataset = Dataset.from_nested_dictionary({
         "HLA-A*02:01": {
             "A" * 9: 20.0,
             "C" * 9: 40000.0,
@@ -24,13 +28,11 @@ def test_create_imputed_datasets_two_alleles():
             "S" * 9: 500.0,
             "A" * 9: 25.0,
         },
-    }
-    dataset = Dataset.from_nested_dictionary(allele_to_peptide_to_affinity_dict)
-    result = create_imputed_datasets(dataset, imputer=MICE(n_imputations=25))
-    eq_(set(result.keys()), {"HLA-A*02:01", "HLA-A*02:05"})
+    })
+    imputed_dataset = create_imputed_datasets(dataset, imputer=MICE(n_imputations=25))
+    eq_(imputed_dataset.unique_alleles(), {"HLA-A*02:01", "HLA-A*02:05"})
     expected_peptides = {"A" * 9, "C" * 9, "S" * 9}
-    for allele_name, allele_data in result.items():
-        print(allele_name, allele_data.peptides)
+    for allele_name, allele_data in imputed_dataset.groupby_allele():
         eq_(set(allele_data.peptides), expected_peptides)
 
 def test_performance_improves_for_A0205_with_pretraining():
@@ -39,23 +41,14 @@ def test_performance_improves_for_A0205_with_pretraining():
     dataset = Dataset.from_csv(CLASS1_DATA_CSV_PATH)
 
     print("Available alleles: %s" % (set(dataset.unique_alleles()),))
-
-    # restrict to just three alleles
-    dataset = dataset.get_alleles(["HLA-A0205", "HLA-A0201", "HLA-A0101"])
-
+    # restrict to just five alleles
+    dataset = dataset.get_alleles([
+        "HLA-A0205", "HLA-A0201", "HLA-A0101", "HLA-B0702"])
     a0205_data_without_imputation = dataset.get_allele("HLA-A0205")
     predictor_without_imputation = \
         Class1BindingPredictor.from_hyperparameters(name="A0205-no-impute")
-
-    print("Without imputation, # samples = %d, # original peptides = %d" % (
-        len(a0205_data_without_imputation.peptides),
-        len(set(a0205_data_without_imputation.original_peptides))))
-
-    print(set(a0205_data_without_imputation.original_peptides))
-    X_index = a0205_data_without_imputation.X_index
-    Y_true = a0205_data_without_imputation.Y
-    sample_weights = a0205_data_without_imputation.weights
-
+    X_index, Y_true, sample_weights, _ = \
+        a0205_data_without_imputation.kmer_index_encoding()
     predictor_without_imputation.fit_kmer_encoded_arrays(
         X=X_index,
         Y=Y_true,
@@ -64,20 +57,14 @@ def test_performance_improves_for_A0205_with_pretraining():
 
     Y_pred_without_imputation = \
         predictor_without_imputation.predict_scores_for_kmer_array(X_index)
-    print("Y_pred without imputation: %s" % (Y_pred_without_imputation,))
-    mse_without_imputation = np.mean((Y_true - Y_pred_without_imputation) ** 2)
-    print("MSE w/out imputation: %f" % mse_without_imputation)
+    diff_squared = (Y_true - Y_pred_without_imputation) ** 2
+    mse_without_imputation = (diff_squared * sample_weights) / sample_weights.sum()
 
-    imputed_data_dict = create_imputed_datasets(
-        allele_data_dict, MICE(n_imputations=25))
-    a0205_data_with_imputation = imputed_data_dict["HLA-A0205"]
-    print("Imputed data, # samples = %d, # original peptides = %d" % (
-        len(a0205_data_with_imputation.peptides),
-        len(set(a0205_data_with_imputation.original_peptides))))
+    imputed_datset = dataset.impute(MICE(n_imputations=25))
+    a0205_data_with_imputation = imputed_datset.get_allele("HLA-A0205")
 
-    X_index_imputed = a0205_data_with_imputation.X_index
-    Y_imputed = a0205_data_with_imputation.Y
-    sample_weights_imputed = a0205_data_with_imputation.weights
+    X_index_imputed, Y_imputed, sample_weights_imputed, _ = \
+        a0205_data_with_imputation.kmer_index_encoding()
 
     predictor_with_imputation = \
         Class1BindingPredictor.from_hyperparameters(name="A0205-impute")
@@ -93,7 +80,9 @@ def test_performance_improves_for_A0205_with_pretraining():
 
     Y_pred_with_imputation = \
         predictor_with_imputation.predict_scores_for_kmer_array(X_index)
-    mse_with_imputation = np.mean((Y_true - Y_pred_with_imputation) ** 2)
+
+    diff_squared = (Y_true - Y_pred_with_imputation) ** 2
+    mse_with_imputation = (diff_squared * sample_weights_imputed) / sample_weights_imputed.sum()
     print("MSE w/ imputation: %f" % (mse_with_imputation,))
 
     assert mse_with_imputation < mse_without_imputation, \