From d7e3af613913b008d7a41a8b35aa9eaccb59ab3b Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Fri, 23 Feb 2018 12:12:50 -0500
Subject: [PATCH] Default to no extra decoys in mass spec model selection

---
 .../select_allele_specific_models_command.py  | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/mhcflurry/select_allele_specific_models_command.py b/mhcflurry/select_allele_specific_models_command.py
index f6cd6e85..01587d76 100644
--- a/mhcflurry/select_allele_specific_models_command.py
+++ b/mhcflurry/select_allele_specific_models_command.py
@@ -482,23 +482,30 @@ class MassSpecModelSelector(object):
             self,
             df,
             predictor,
-            decoys_per_length=5000,
+            decoys_per_length=0,
             min_measurements=100,
             multiply_score_by_data_size=True):
 
-        (min_length, max_length) = predictor.supported_peptide_lengths
-        decoys = []
-        for length in range(min_length, max_length + 1):
-            decoys.extend(
-                random_peptides(decoys_per_length, length=length))
-
         # Index is peptide, columns are alleles
-        hit_matrix = df.groupby(["peptide", "allele"]).measurement_value.count().unstack().fillna(0).astype(bool)
+        hit_matrix = df.groupby(
+            ["peptide", "allele"]).measurement_value.count().unstack().fillna(
+            0).astype(bool)
+
+        if decoys_per_length:
+            (min_length, max_length) = predictor.supported_peptide_lengths
+            decoys = []
+            for length in range(min_length, max_length + 1):
+                decoys.extend(
+                    random_peptides(decoys_per_length, length=length))
+
+            decoy_matrix = pandas.DataFrame(
+                index=decoys, columns=hit_matrix.columns, dtype=bool)
+            decoy_matrix[:] = False
+            full_matrix = pandas.concat([hit_matrix, decoy_matrix])
+        else:
+            full_matrix = hit_matrix
 
-        decoy_matrix = pandas.DataFrame(
-            index=decoys, columns=hit_matrix.columns, dtype=bool)
-        decoy_matrix[:] = False
-        full_matrix = pandas.concat([hit_matrix, decoy_matrix]).sample(frac=1.0)
+        full_matrix = full_matrix.sample(frac=1.0)
 
         self.df = full_matrix
         self.predictor = predictor
-- 
GitLab