From 0805397e0e1884f1f822675acd4be271ade3c086 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Sun, 6 Oct 2019 21:46:41 -0400 Subject: [PATCH] fix --- .../data_mass_spec_benchmark/GENERATE.sh | 55 +++++++++---------- .../run_predictors.py | 16 ++++-- 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh index f42013b3..fbb1db71 100755 --- a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh +++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh @@ -98,8 +98,7 @@ fi # Write out and process peptides. # First just chr1 peptides, then all peptides. # TODO: switch this back -#for subset in chr1 all -for subset in all chr1 +for subset in chr1 all do if [ "$2" == "reuse-all" ] then @@ -120,30 +119,32 @@ do fi # Run netmhcpan4 - OUT_DIR=predictions/${subset}.netmhcpan4 - REUSE1="" - REUSE2="" - if [ "$subset" == "all" ] - then - #REUSE1="predictions/chr1.netmhcpan4" - # TODO: switch this back - REUSE1="$EXISTING_DATA"/predictions/chr1.netmhcpan4 - fi - if [ "${2:-reuse-none}" != "reuse-none" ] - then - REUSE2="$EXISTING_DATA"/$OUT_DIR - fi + for kind in el ba + do + OUT_DIR=predictions/${subset}.netmhcpan4.$kind + REUSE1="" + REUSE2="" + if [ "$subset" == "all" ] + then + REUSE1="predictions/chr1.netmhcpan4.$kind" + fi + if [ "${2:-reuse-none}" != "reuse-none" ] + then + REUSE2="$EXISTING_DATA"/$OUT_DIR + fi + + python run_predictors.py \ + proteome_peptides.$subset.csv.bz2 \ + --result-dtype "float16" \ + --predictor netmhcpan4-$kind \ + --chunk-size 10000 \ + --allele $(cat alleles.txt) \ + --out "$OUT_DIR" \ + --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ + --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \ + --reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS + done - python run_predictors.py \ - proteome_peptides.$subset.csv.bz2 \ - --result-dtype "float16" \ - --predictor netmhcpan4 \ - --chunk-size 10000 \ - --allele $(cat alleles.txt) \ - --out "$OUT_DIR" \ - --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ - --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \ - --reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS # Run MHCflurry for kind in with_mass_spec no_mass_spec @@ -153,9 +154,7 @@ do REUSE2="" if [ "$subset" == "all" ] then - #REUSE1="predictions/chr1.mhcflurry.${kind}" - # TODO: switch this back - REUSE1="$EXISTING_DATA"/predictions/chr1.mhcflurry.${kind} + REUSE1="predictions/chr1.mhcflurry.${kind}" fi if [ "${2:-reuse-none}" != "reuse-none" ] && [ "${2:-reuse-none}" != "reuse-predictions-except-mhcflurry" ] then diff --git a/downloads-generation/data_mass_spec_benchmark/run_predictors.py b/downloads-generation/data_mass_spec_benchmark/run_predictors.py index d4324737..be034ba9 100644 --- a/downloads-generation/data_mass_spec_benchmark/run_predictors.py +++ b/downloads-generation/data_mass_spec_benchmark/run_predictors.py @@ -43,7 +43,7 @@ parser.add_argument( parser.add_argument( "--predictor", required=True, - choices=("mhcflurry", "netmhcpan4")) + choices=("mhcflurry", "netmhcpan4-ba", "netmhcpan4-el")) parser.add_argument( "--mhcflurry-models-dir", metavar="DIR", @@ -88,7 +88,8 @@ add_cluster_parallelism_args(parser) PREDICTOR_TO_COLS = { "mhcflurry": ["affinity"], - "netmhcpan4": ["affinity", "percentile_rank", "elution_score"], + "netmhcpan4-ba": ["affinity", "percentile_rank"], + "netmhcpan4-el": ["elution_score"], } @@ -376,9 +377,16 @@ def do_predictions_mhctools(work_item_dicts, constant_data=None): result = {} results.append((work_item_num, result)) - if predictor_name == "netmhcpan4": + if predictor_name == "netmhcpan4-ba": predictor = mhctools.NetMHCpan4( - alleles=alleles, program_name="netMHCpan-4.0") + alleles=alleles, + program_name="netMHCpan-4.0", + mode="binding_affinity") + elif predictor_name == "netmhcpan4-el": + predictor = mhctools.NetMHCpan4( + alleles=alleles, + program_name="netMHCpan-4.0", + mode="elution_score") else: raise ValueError("Unsupported", predictor_name) -- GitLab