From ccfab5c6784e69758f85e95d5dbba7e3f1ee8ecf Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Tue, 7 Jan 2020 18:02:20 -0500 Subject: [PATCH] add --- .../reassign_mass_spec_training_data.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py diff --git a/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py b/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py new file mode 100644 index 00000000..0c3625b8 --- /dev/null +++ b/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py @@ -0,0 +1,46 @@ +""" +Reassign affinity values for mass spec data +""" +import sys +import os +import argparse + +import pandas + +parser = argparse.ArgumentParser(usage=__doc__) + +parser.add_argument("data", metavar="CSV", help="Training data") +parser.add_argument("--ms-only", action="store_true", default=False) +parser.add_argument("--set-measurement-value", type=float) +parser.add_argument("--out-csv") + + +def go(args): + df = pandas.read_csv(args.data) + print(df) + + assert ( + df.loc[df.measurement_kind == "mass_spec"].measurement_inequality + == "<").all() + + if args.ms_only: + print("Filtering to MS only") + df = df.loc[df.kind == "mass_spec"] + + if args.set_measurement_value: + indexer = df.measurement_kind == "mass_spec" + df.loc[ + indexer, + "measurement_value" + ] = args.set_measurement_value + print("Reassigned:") + print(df.loc[indexer]) + + if args.out_csv: + out_csv = os.path.abspath(args.out_csv) + df.to_csv(out_csv, index=False) + print("Wrote", out_csv) + + +if __name__ == "__main__": + go(parser.parse_args(sys.argv[1:])) -- GitLab