From b4f63aea7187f0553248b8fbb17214fd8831c461 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Fri, 16 Feb 2018 11:40:26 -0500 Subject: [PATCH] update --- mhcflurry/class1_affinity_predictor.py | 5 ++--- mhcflurry/encodable_sequences.py | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index c6521046..ea4939b4 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -821,9 +821,7 @@ class Class1AffinityPredictor(object): raise ValueError("Must specify 'allele' or 'alleles'.") peptides = EncodableSequences.create(peptides) - df = pandas.DataFrame({ - 'peptide': peptides.sequences, - }) + df = peptides.sequences_df.rename(columns={'sequence': 'peptide'}) if allele is not None: if alleles is not None: @@ -948,6 +946,7 @@ class Class1AffinityPredictor(object): ] columns.remove("normalized_allele") columns.remove("supported_peptide_length") + columns.remove("sequence_length") if include_percentile_ranks: if self.allele_to_percent_rank_transform: diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py index 6dcd5e63..da4cf9dc 100644 --- a/mhcflurry/encodable_sequences.py +++ b/mhcflurry/encodable_sequences.py @@ -35,7 +35,11 @@ class EncodableSequences(object): def __init__(self, sequences): if not all(isinstance(obj, string_types) for obj in sequences): raise ValueError("Sequence of strings is required") - self.sequences = numpy.array(sequences) + self.sequences_df = pandas.DataFrame({ + "sequence": numpy.array(sequences), + "sequence_length": numpy.array(sequences), + }) + self.sequences = self.sequences_df.sequence.values self.encoding_cache = {} self.fixed_sequence_length = None if len(self.sequences) > 0 and all( -- GitLab