From bbfd350d889a2cb7f2871aa5380bac7a61f7af05 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Thu, 25 Jan 2018 19:32:14 -0500
Subject: [PATCH] support for inequalities in train command

---
 .../models_class1_no_mass_spec/GENERATE.sh           |  4 +++-
 mhcflurry/train_allele_specific_models_command.py    | 12 ++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/downloads-generation/models_class1_no_mass_spec/GENERATE.sh b/downloads-generation/models_class1_no_mass_spec/GENERATE.sh
index 3b80f704..00a6233b 100755
--- a/downloads-generation/models_class1_no_mass_spec/GENERATE.sh
+++ b/downloads-generation/models_class1_no_mass_spec/GENERATE.sh
@@ -36,7 +36,9 @@ time mhcflurry-class1-train-allele-specific-models \
     --hyperparameters hyperparameters.yaml \
     --out-models-dir models \
     --percent-rank-calibration-num-peptides-per-length 1000000 \
-    --min-measurements-per-allele 75
+    --min-measurements-per-allele 75 \
+    --ignore-inequalities
+
 
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 LOG.txt
diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py
index b095e6a4..8d63b4f3 100644
--- a/mhcflurry/train_allele_specific_models_command.py
+++ b/mhcflurry/train_allele_specific_models_command.py
@@ -52,6 +52,11 @@ parser.add_argument(
     action="store_true",
     default=False,
     help="Use only quantitative training data")
+parser.add_argument(
+    "--ignore-inequalities",
+    action="store_true",
+    default=False,
+    help="Do not use affinity value inequalities even when present in data")
 parser.add_argument(
     "--percent-rank-calibration-num-peptides-per-length",
     type=int,
@@ -115,6 +120,10 @@ def run(argv=sys.argv[1:]):
         ]
         print("Subselected to quantitative: %s" % (str(df.shape)))
 
+    if args.ignore_inequalities and "measurement_inequality" in df.columns:
+        print("Dropping measurement_inequality column")
+        del df["measurement_inequality"]
+
     allele_counts = df.allele.value_counts()
 
     if args.allele:
@@ -263,6 +272,9 @@ def process_work(
         allele=allele,
         peptides=train_data.peptide.values,
         affinities=train_data.measurement_value.values,
+        inequalities=(
+            train_data.measurement_inequality.values
+            if "measurement_inequality" in train_data.columns else None),
         models_dir_for_save=save_to,
         progress_preamble=progress_preamble,
         verbose=verbose)
-- 
GitLab