diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index c6521046efe8273e2ace3d0de1595cac1b75adea..ea4939b4f2343e403e62c5176654e2e0fc256979 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -821,9 +821,7 @@ class Class1AffinityPredictor(object): raise ValueError("Must specify 'allele' or 'alleles'.") peptides = EncodableSequences.create(peptides) - df = pandas.DataFrame({ - 'peptide': peptides.sequences, - }) + df = peptides.sequences_df.rename(columns={'sequence': 'peptide'}) if allele is not None: if alleles is not None: @@ -948,6 +946,7 @@ class Class1AffinityPredictor(object): ] columns.remove("normalized_allele") columns.remove("supported_peptide_length") + columns.remove("sequence_length") if include_percentile_ranks: if self.allele_to_percent_rank_transform: diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py index 6dcd5e6328009baa6e4e3da4a806449f3fa83cfd..da4cf9dc1ee5de10a48ca10b393e8cf05f7be431 100644 --- a/mhcflurry/encodable_sequences.py +++ b/mhcflurry/encodable_sequences.py @@ -35,7 +35,11 @@ class EncodableSequences(object): def __init__(self, sequences): if not all(isinstance(obj, string_types) for obj in sequences): raise ValueError("Sequence of strings is required") - self.sequences = numpy.array(sequences) + self.sequences_df = pandas.DataFrame({ + "sequence": numpy.array(sequences), + "sequence_length": numpy.array(sequences), + }) + self.sequences = self.sequences_df.sequence.values self.encoding_cache = {} self.fixed_sequence_length = None if len(self.sequences) > 0 and all(