From d7e3af613913b008d7a41a8b35aa9eaccb59ab3b Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Fri, 23 Feb 2018 12:12:50 -0500 Subject: [PATCH] Default to no extra decoys in mass spec model selection --- .../select_allele_specific_models_command.py | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/mhcflurry/select_allele_specific_models_command.py b/mhcflurry/select_allele_specific_models_command.py index f6cd6e85..01587d76 100644 --- a/mhcflurry/select_allele_specific_models_command.py +++ b/mhcflurry/select_allele_specific_models_command.py @@ -482,23 +482,30 @@ class MassSpecModelSelector(object): self, df, predictor, - decoys_per_length=5000, + decoys_per_length=0, min_measurements=100, multiply_score_by_data_size=True): - (min_length, max_length) = predictor.supported_peptide_lengths - decoys = [] - for length in range(min_length, max_length + 1): - decoys.extend( - random_peptides(decoys_per_length, length=length)) - # Index is peptide, columns are alleles - hit_matrix = df.groupby(["peptide", "allele"]).measurement_value.count().unstack().fillna(0).astype(bool) + hit_matrix = df.groupby( + ["peptide", "allele"]).measurement_value.count().unstack().fillna( + 0).astype(bool) + + if decoys_per_length: + (min_length, max_length) = predictor.supported_peptide_lengths + decoys = [] + for length in range(min_length, max_length + 1): + decoys.extend( + random_peptides(decoys_per_length, length=length)) + + decoy_matrix = pandas.DataFrame( + index=decoys, columns=hit_matrix.columns, dtype=bool) + decoy_matrix[:] = False + full_matrix = pandas.concat([hit_matrix, decoy_matrix]) + else: + full_matrix = hit_matrix - decoy_matrix = pandas.DataFrame( - index=decoys, columns=hit_matrix.columns, dtype=bool) - decoy_matrix[:] = False - full_matrix = pandas.concat([hit_matrix, decoy_matrix]).sample(frac=1.0) + full_matrix = full_matrix.sample(frac=1.0) self.df = full_matrix self.predictor = predictor -- GitLab