diff --git a/downloads-generation/data_curated/GENERATE.sh b/downloads-generation/data_curated/GENERATE.sh index 1f2b7067488963912dea57932ff60f36fa55ac42..11eb52ce42dc8a26e34e2b659e76f70559c35eca 100755 --- a/downloads-generation/data_curated/GENERATE.sh +++ b/downloads-generation/data_curated/GENERATE.sh @@ -30,6 +30,7 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME cp $SCRIPT_DIR/curate.py . +# No mass-spec data time python curate.py \ --data-iedb \ "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \ @@ -37,6 +38,9 @@ time python curate.py \ "$(mhcflurry-downloads path data_published)/bdata.20130222.mhci.public.1.txt" \ --out-csv curated_training_data.no_mass_spec.csv +# With mass-spec data +# Note that we STILL drop mass-spec data from IEDB here, since this data seems +# low-quality. time python curate.py \ --data-iedb \ "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \ diff --git a/downloads-generation/data_curated/curate.py b/downloads-generation/data_curated/curate.py index fbb503a471cf91b36b58e089435794deee1946a2..32f4a8540ad3ab39843a97f5f1d9374a7c004786 100755 --- a/downloads-generation/data_curated/curate.py +++ b/downloads-generation/data_curated/curate.py @@ -40,7 +40,7 @@ parser.add_argument( default=[], help="Path to Abelin Immunity 2017 mass-spec hits") parser.add_argument( - "--include-mass-spec", + "--include-iedb-mass-spec", action="store_true", default=False, help="Include mass-spec observations in IEDB") @@ -237,7 +237,7 @@ def run(): dfs = [] for filename in args.data_iedb: - df = load_data_iedb(filename, include_mass_spec=args.include_mass_spec) + df = load_data_iedb(filename, include_mass_spec=args.include_iedb_mass_spec) dfs.append(df) for filename in args.data_kim2014: df = load_data_kim2014(filename)