From 629baf6020432b086a7aaab8fbcbdf7de2432a53 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn <alex.rubinsteyn@gmail.com> Date: Wed, 11 May 2016 18:36:22 -0400 Subject: [PATCH] small fixes --- ...llele_specific_kmer_ic50_predictor_base.py | 1 + mhcflurry/common.py | 2 +- mhcflurry/dataset.py | 21 ++++++++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/mhcflurry/class1_allele_specific_kmer_ic50_predictor_base.py b/mhcflurry/class1_allele_specific_kmer_ic50_predictor_base.py index 5f1886c6..8d2304d1 100644 --- a/mhcflurry/class1_allele_specific_kmer_ic50_predictor_base.py +++ b/mhcflurry/class1_allele_specific_kmer_ic50_predictor_base.py @@ -44,6 +44,7 @@ class Class1AlleleSpecificKmerIC50PredictorBase(IC50PredictorBase): max_ic50=MAX_IC50, kmer_size=9): IC50PredictorBase.__init__( + self, name=name, max_ic50=max_ic50, verbose=verbose) diff --git a/mhcflurry/common.py b/mhcflurry/common.py index 0849f238..318c84d2 100644 --- a/mhcflurry/common.py +++ b/mhcflurry/common.py @@ -75,7 +75,7 @@ def geometric_mean(xs, weights=None): if len(xs) == 1: return xs[0] elif weights is None: - return exp(sum(log(xi) for xi in xs)) + return exp(sum(log(xi) for xi in xs) / len(xs)) sum_weighted_log = sum(log(xi) * wi for (xi, wi) in zip(xs, weights)) denom = sum(weights) return exp(sum_weighted_log / denom) diff --git a/mhcflurry/dataset.py b/mhcflurry/dataset.py index ea0dc87d..886d5ca3 100644 --- a/mhcflurry/dataset.py +++ b/mhcflurry/dataset.py @@ -97,28 +97,28 @@ class Dataset(object): """ Array of peptides from pMHC measurements. """ - return self._peptides + return self._df["peptide"].values @property def alleles(self): """ Array of MHC allele names from pMHC measurements. """ - return self._alleles + return self.to_dataframe()["allele"].values @property def affinities(self): """ Array of affinities from pMHC measurements. """ - return self._affinities + return self.to_dataframe()["affinity"].values @property def sample_weights(self): """ Array of sample weights for each pMHC measurement. """ - return self._sample_weights + return self.to_dataframe()["sample_weight"].values def __len__(self): return len(self.to_dataframe()) @@ -136,18 +136,14 @@ class Dataset(object): return False elif len(self) != len(other): return False - elif len(self.columns) != len(other.columns): + elif list(self.columns) != list(other.columns): return False - for ci, cj in zip(self.columns, other.columns): - if ci != cj: - return False - self_df = self.to_dataframe() other_df = other.to_dataframe() for column_name in self.columns: - if not (self_df[column_name] == other_df[column_name]).all(): + if (self_df[column_name] != other_df[column_name]).any(): return False return True @@ -373,6 +369,11 @@ class Dataset(object): Create zero or more peptides from each pMHC entry. The affinity of all new peptides is identical to the original, but sample weights are divided across the number of new peptides. + + Parameters + ---------- + peptide_fn : function + Maps each peptide to a list of peptides. """ columns = self.to_dataframe().columns new_data_dict = OrderedDict( -- GitLab