diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index 2bacd7ba2cef84f352a42e71861f7a8877db13a3..a691979c7911a35704a34a377d639f8ca07ad7eb 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -986,9 +986,6 @@ class Class1AffinityPredictor(object): df["prediction_low"] = numpy.exp(numpy.percentile(logs, 5.0, axis=1)) df["prediction_high"] = numpy.exp(numpy.percentile(logs, 95.0, axis=1)) - del df["normalized_allele"] - del df["supported_peptide_length"] - if include_individual_model_predictions: for i in range(num_pan_models): df["model_pan_%d" % i] = predictions_array[:, i] @@ -1006,6 +1003,9 @@ class Class1AffinityPredictor(object): throw=throw) else: warnings.warn("No percentile rank information available.") + + del df["supported_peptide_length"] + del df["normalized_allele"] return df @staticmethod diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py index 78b88cb8925bce9e3ee790b6dd93348f00009b0b..47d62f886e3bca21753c962954deae6869f89bbe 100644 --- a/mhcflurry/encodable_sequences.py +++ b/mhcflurry/encodable_sequences.py @@ -35,13 +35,12 @@ class EncodableSequences(object): def __init__(self, sequences): if not all(isinstance(obj, string_types) for obj in sequences): raise ValueError("Sequence of strings is required") - self.sequences_df = pandas.DataFrame({ - "sequence": numpy.array(sequences), - }) - self.sequences_df["sequence_length"] = self.sequences_df.sequence.str.len() - self.min_length = self.sequences_df.sequence_length.min() - self.max_length = self.sequences_df.sequence_length.max() - self.sequences = self.sequences_df.sequence.values + self.sequences = numpy.array(sequences) + lengths = pandas.Series(self.sequences).str.len() + + self.min_length = lengths.min() + self.max_length = lengths.max() + self.encoding_cache = {} self.fixed_sequence_length = None if len(self.sequences) > 0 and all(