diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index ea4939b4f2343e403e62c5176654e2e0fc256979..2ad081f6cd155bf52c6cbc3554b67662ebd9a700 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -848,20 +848,29 @@ class Class1AffinityPredictor(object): (min_peptide_length, max_peptide_length) = ( self.supported_peptide_lengths) - df["supported_peptide_length"] = ( - (df.peptide.str.len() >= min_peptide_length) & - (df.peptide.str.len() <= max_peptide_length)) - if (~df.supported_peptide_length).any(): - msg = ( - "%d peptides have lengths outside of supported range [%d, %d]: " - "%s" % ( - (~df.supported_peptide_length).sum(), - min_peptide_length, - max_peptide_length, - str(df.ix[~df.supported_peptide_length].peptide.unique()))) - logging.warning(msg) - if throw: - raise ValueError(msg) + + if (peptides.min_length < min_peptide_length or + peptides.max_length > max_peptide_length): + # Only compute this if needed + all_peptide_lengths_supported = False + df["supported_peptide_length"] = ( + (df.sequence_length.len() >= min_peptide_length) & + (df.sequence_length.len() <= max_peptide_length)) + if (~df.supported_peptide_length).any(): + msg = ( + "%d peptides have lengths outside of supported range [%d, %d]: " + "%s" % ( + (~df.supported_peptide_length).sum(), + min_peptide_length, + max_peptide_length, + str(df.ix[~df.supported_peptide_length].peptide.unique()))) + logging.warning(msg) + if throw: + raise ValueError(msg) + else: + # Handle common case efficiently. + df["supported_peptide_length"] = True + all_peptide_lengths_supported = True if self.class1_pan_allele_models: unsupported_alleles = [ @@ -904,12 +913,16 @@ class Class1AffinityPredictor(object): logging.warning(msg) if throw: raise ValueError(msg) + for allele in query_alleles: models = self.allele_to_allele_specific_models.get(allele, []) - mask = ( - (df.normalized_allele == allele) & - df.supported_peptide_length).values - if mask.all(): + if len(query_alleles) == 1 and all_peptide_lengths_supported: + mask = None + else: + mask = ( + (df.normalized_allele == allele) & + df.supported_peptide_length).values + if mask is None or mask.all(): # Common case optimization for (i, model) in enumerate(models): df["model_single_%d" % i] = model.predict(peptides) diff --git a/mhcflurry/encodable_sequences.py b/mhcflurry/encodable_sequences.py index da4cf9dc1ee5de10a48ca10b393e8cf05f7be431..efe29c1477cd693792ede926a0751c752dc25a7a 100644 --- a/mhcflurry/encodable_sequences.py +++ b/mhcflurry/encodable_sequences.py @@ -39,6 +39,8 @@ class EncodableSequences(object): "sequence": numpy.array(sequences), "sequence_length": numpy.array(sequences), }) + self.min_length = self.sequences_df.sequence_length.min() + self.max_length = self.sequences_df.sequence_length.max() self.sequences = self.sequences_df.sequence.values self.encoding_cache = {} self.fixed_sequence_length = None