From 73a1ce08750a4b9843b3d7b0297e3e150a915181 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Thu, 25 Jan 2018 19:49:14 -0500 Subject: [PATCH] update comment --- downloads-generation/data_curated/GENERATE.sh | 4 ++++ downloads-generation/data_curated/curate.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/downloads-generation/data_curated/GENERATE.sh b/downloads-generation/data_curated/GENERATE.sh index 1f2b7067..11eb52ce 100755 --- a/downloads-generation/data_curated/GENERATE.sh +++ b/downloads-generation/data_curated/GENERATE.sh @@ -30,6 +30,7 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME cp $SCRIPT_DIR/curate.py . +# No mass-spec data time python curate.py \ --data-iedb \ "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \ @@ -37,6 +38,9 @@ time python curate.py \ "$(mhcflurry-downloads path data_published)/bdata.20130222.mhci.public.1.txt" \ --out-csv curated_training_data.no_mass_spec.csv +# With mass-spec data +# Note that we STILL drop mass-spec data from IEDB here, since this data seems +# low-quality. time python curate.py \ --data-iedb \ "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \ diff --git a/downloads-generation/data_curated/curate.py b/downloads-generation/data_curated/curate.py index fbb503a4..32f4a854 100755 --- a/downloads-generation/data_curated/curate.py +++ b/downloads-generation/data_curated/curate.py @@ -40,7 +40,7 @@ parser.add_argument( default=[], help="Path to Abelin Immunity 2017 mass-spec hits") parser.add_argument( - "--include-mass-spec", + "--include-iedb-mass-spec", action="store_true", default=False, help="Include mass-spec observations in IEDB") @@ -237,7 +237,7 @@ def run(): dfs = [] for filename in args.data_iedb: - df = load_data_iedb(filename, include_mass_spec=args.include_mass_spec) + df = load_data_iedb(filename, include_mass_spec=args.include_iedb_mass_spec) dfs.append(df) for filename in args.data_kim2014: df = load_data_kim2014(filename) -- GitLab