From ccfab5c6784e69758f85e95d5dbba7e3f1ee8ecf Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Tue, 7 Jan 2020 18:02:20 -0500
Subject: [PATCH] add

---
 .../reassign_mass_spec_training_data.py       | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py

diff --git a/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py b/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py
new file mode 100644
index 00000000..0c3625b8
--- /dev/null
+++ b/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py
@@ -0,0 +1,46 @@
+"""
+Reassign affinity values for mass spec data
+"""
+import sys
+import os
+import argparse
+
+import pandas
+
+parser = argparse.ArgumentParser(usage=__doc__)
+
+parser.add_argument("data", metavar="CSV", help="Training data")
+parser.add_argument("--ms-only", action="store_true", default=False)
+parser.add_argument("--set-measurement-value", type=float)
+parser.add_argument("--out-csv")
+
+
+def go(args):
+    df = pandas.read_csv(args.data)
+    print(df)
+
+    assert (
+        df.loc[df.measurement_kind == "mass_spec"].measurement_inequality
+        == "<").all()
+
+    if args.ms_only:
+        print("Filtering to MS only")
+        df = df.loc[df.kind == "mass_spec"]
+
+    if args.set_measurement_value:
+        indexer = df.measurement_kind == "mass_spec"
+        df.loc[
+            indexer,
+            "measurement_value"
+        ] = args.set_measurement_value
+        print("Reassigned:")
+        print(df.loc[indexer])
+
+    if args.out_csv:
+        out_csv = os.path.abspath(args.out_csv)
+        df.to_csv(out_csv, index=False)
+        print("Wrote", out_csv)
+
+
+if __name__ == "__main__":
+    go(parser.parse_args(sys.argv[1:]))
-- 
GitLab