diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh index 5710ee0cf057b924eafb894c860f96e49677f9f3..21d1863912e719beea54fbb7fcb51ff392971881 100755 --- a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh +++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh @@ -96,9 +96,7 @@ else fi # Write out and process peptides. -# First just chr1 peptides, then all peptides. -# TODO: switch this back -for subset in chr1 all +for subset in all do if [ "$2" == "reuse-all" ] then @@ -118,19 +116,33 @@ do bzip2 proteome_peptides.$subset.csv fi + # Run mixmhcpred + OUT_DIR=predictions/${subset}.mixmhcpred + REUSE="" + if [ "${2:-reuse-none}" != "reuse-none" ] + then + REUSE="$EXISTING_DATA"/$OUT_DIR + fi + + python run_predictors.py \ + proteome_peptides.$subset.csv.bz2 \ + --result-dtype "float16" \ + --predictor mixmhcpred \ + --chunk-size 500000 \ + --allele $(cat alleles.txt) \ + --out "$OUT_DIR" \ + --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ + --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \ + --reuse-predictions "$REUSE" $EXTRA_ARGS + # Run netmhcpan4 for kind in el ba do OUT_DIR=predictions/${subset}.netmhcpan4.$kind - REUSE1="" - REUSE2="" - if [ "$subset" == "all" ] - then - REUSE1="predictions/chr1.netmhcpan4.$kind" - fi + REUSE="" if [ "${2:-reuse-none}" != "reuse-none" ] then - REUSE2="$EXISTING_DATA"/$OUT_DIR + REUSE="$EXISTING_DATA"/$OUT_DIR fi python run_predictors.py \ @@ -138,11 +150,11 @@ do --result-dtype "float16" \ --predictor netmhcpan4-$kind \ --chunk-size 1000 \ - --allele $(cat alleles.txt | grep -v '31:0102') \ + --allele $(cat alleles.txt) \ --out "$OUT_DIR" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \ - --reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS + --reuse-predictions "$REUSE" $EXTRA_ARGS done @@ -150,15 +162,10 @@ do for kind in combined do OUT_DIR=predictions/${subset}.mhcflurry.${kind} - REUSE1="" - REUSE2="" - if [ "$subset" == "all" ] - then - REUSE1="predictions/chr1.mhcflurry.${kind}" - fi + REUSE="" if [ "${2:-reuse-none}" != "reuse-none" ] && [ "${2:-reuse-none}" != "reuse-predictions-except-mhcflurry" ] then - REUSE2="$EXISTING_DATA"/$OUT_DIR + REUSE="$EXISTING_DATA"/$OUT_DIR fi python run_predictors.py \ @@ -172,7 +179,7 @@ do --out "$OUT_DIR" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.gpu.lsf \ - --reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS + --reuse-predictions "$REUSE" $EXTRA_ARGS done done diff --git a/downloads-generation/data_mass_spec_benchmark/run_predictors.py b/downloads-generation/data_mass_spec_benchmark/run_predictors.py index 482ed8ab58d0ce78ee059e7985a73248109bb58f..4b9345017f580f360797cf4c0811cfebbff3b94e 100644 --- a/downloads-generation/data_mass_spec_benchmark/run_predictors.py +++ b/downloads-generation/data_mass_spec_benchmark/run_predictors.py @@ -43,7 +43,7 @@ parser.add_argument( parser.add_argument( "--predictor", required=True, - choices=("mhcflurry", "netmhcpan4-ba", "netmhcpan4-el")) + choices=("mhcflurry", "netmhcpan4-ba", "netmhcpan4-el", "mixmhcpred")) parser.add_argument( "--mhcflurry-models-dir", metavar="DIR", @@ -90,6 +90,7 @@ PREDICTOR_TO_COLS = { "mhcflurry": ["affinity"], "netmhcpan4-ba": ["affinity", "percentile_rank"], "netmhcpan4-el": ["elution_score"], + "mixmhcpred": ["elution_score"], } @@ -392,6 +393,11 @@ def do_predictions_mhctools(work_item_dicts, constant_data=None): alleles=alleles, program_name="netMHCpan-4.0", mode="elution_score") + elif predictor_name == "mixmhcpred": + predictor = mhctools.MixMHCpred( + alleles=alleles, + program_name="netMHCpan-4.0", + mode="elution_score") else: raise ValueError("Unsupported", predictor_name)