From 0b12056c4e44bb59448e0535f04c7e7a1a20d009 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Tue, 7 Jan 2020 18:26:58 -0500 Subject: [PATCH] fix --- .../models_class1_pan_variants/GENERATE.sh | 12 ++++-------- .../reassign_mass_spec_training_data.py | 13 ++++++++----- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/downloads-generation/models_class1_pan_variants/GENERATE.sh b/downloads-generation/models_class1_pan_variants/GENERATE.sh index 13d7aecb..7782d2fa 100755 --- a/downloads-generation/models_class1_pan_variants/GENERATE.sh +++ b/downloads-generation/models_class1_pan_variants/GENERATE.sh @@ -69,8 +69,8 @@ then python generate_hyperparameters.py hyperparameters.production.yaml compact_peptide > hyperparameters.compact_peptide.yaml fi -#VARIANTS=( no_additional_ms_ms_only_0nm ms_only_0nm no_additional_ms_0nm 0nm no_additional_ms ms_only no_pretrain compact_peptide 34mer_sequence single_hidden_no_pretrain affinity_only ) -VARIANTS=( no_additional_ms_ms_only_0nm ms_only_0nm no_additional_ms_0nm 0nm no_additional_ms ms_only no_pretrain compact_peptide 34mer_sequence ) +#VARIANTS=( no_additional_ms_ms_only_0nm ms_only_0nm no_additional_ms_0nm 0nm no_additional_ms no_pretrain compact_peptide 34mer_sequence single_hidden_no_pretrain affinity_only ) +VARIANTS=( no_additional_ms_ms_only_0nm ms_only_0nm no_additional_ms_0nm 0nm no_additional_ms no_pretrain compact_peptide 34mer_sequence ) for kind in "${VARIANTS[@]}" do @@ -102,6 +102,7 @@ do python reassign_mass_spec_training_data.py \ "$(mhcflurry-downloads path data_curated)/curated_training_data.no_additional_ms.csv.bz2" \ --set-measurement-value 0 \ + --drop-negative-ms \ --ms-only \ --out-csv "$TRAINING_DATA" HYPERPARAMETERS=hyperparameters.production.yaml @@ -133,16 +134,11 @@ do python reassign_mass_spec_training_data.py \ "$(mhcflurry-downloads path data_curated)/curated_training_data.mass_spec.csv.bz2" \ --set-measurement-value 0 \ + --drop-negative-ms \ --out-csv "$TRAINING_DATA" HYPERPARAMETERS=hyperparameters.production.yaml fi - if [ "$kind" == "ms_only" ] - then - TRAINING_DATA="$(mhcflurry-downloads path data_curated)/curated_training_data.mass_spec.csv.bz2" - HYPERPARAMETERS=hyperparameters.production.yaml - fi - if [ "$kind" == "affinity_only" ] then TRAINING_DATA="$(mhcflurry-downloads path data_curated)/curated_training_data.affinity.csv.bz2" diff --git a/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py b/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py index 15838419..185eb75f 100644 --- a/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py +++ b/downloads-generation/models_class1_pan_variants/reassign_mass_spec_training_data.py @@ -11,6 +11,7 @@ parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument("data", metavar="CSV", help="Training data") parser.add_argument("--ms-only", action="store_true", default=False) +parser.add_argument("--drop-negative-ms", action="store_true", default=False) parser.add_argument("--set-measurement-value", type=float) parser.add_argument("--out-csv") @@ -21,11 +22,13 @@ def go(args): df = pandas.read_csv(args.data) print(df) - bad = df.loc[ - (df.measurement_kind == "mass_spec") & - (df.measurement_inequality != "<") - ] - assert len(bad) == 0, bad + if args.drop_negative_ms: + bad = df.loc[ + (df.measurement_kind == "mass_spec") & + (df.measurement_inequality != "<") + ] + print("Dropping ", len(bad)) + df = df.loc[~df.index.isin(bad.index)].copy() if args.ms_only: print("Filtering to MS only") -- GitLab