From 73a1ce08750a4b9843b3d7b0297e3e150a915181 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Thu, 25 Jan 2018 19:49:14 -0500
Subject: [PATCH] update comment

---
 downloads-generation/data_curated/GENERATE.sh | 4 ++++
 downloads-generation/data_curated/curate.py   | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/downloads-generation/data_curated/GENERATE.sh b/downloads-generation/data_curated/GENERATE.sh
index 1f2b7067..11eb52ce 100755
--- a/downloads-generation/data_curated/GENERATE.sh
+++ b/downloads-generation/data_curated/GENERATE.sh
@@ -30,6 +30,7 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME
 
 cp $SCRIPT_DIR/curate.py .
 
+# No mass-spec data
 time python curate.py \
     --data-iedb \
         "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \
@@ -37,6 +38,9 @@ time python curate.py \
         "$(mhcflurry-downloads path data_published)/bdata.20130222.mhci.public.1.txt" \
     --out-csv curated_training_data.no_mass_spec.csv
 
+# With mass-spec data
+# Note that we STILL drop mass-spec data from IEDB here, since this data seems
+# low-quality.
 time python curate.py \
     --data-iedb \
         "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \
diff --git a/downloads-generation/data_curated/curate.py b/downloads-generation/data_curated/curate.py
index fbb503a4..32f4a854 100755
--- a/downloads-generation/data_curated/curate.py
+++ b/downloads-generation/data_curated/curate.py
@@ -40,7 +40,7 @@ parser.add_argument(
     default=[],
     help="Path to Abelin Immunity 2017 mass-spec hits")
 parser.add_argument(
-    "--include-mass-spec",
+    "--include-iedb-mass-spec",
     action="store_true",
     default=False,
     help="Include mass-spec observations in IEDB")
@@ -237,7 +237,7 @@ def run():
 
     dfs = []
     for filename in args.data_iedb:
-        df = load_data_iedb(filename, include_mass_spec=args.include_mass_spec)
+        df = load_data_iedb(filename, include_mass_spec=args.include_iedb_mass_spec)
         dfs.append(df)
     for filename in args.data_kim2014:
         df = load_data_kim2014(filename)
-- 
GitLab