Skip to content
Snippets Groups Projects
Commit fb4bb8a2 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

Add mixmhcpred to mass spec benchmark

parent 3136d2f1
No related branches found
No related tags found
No related merge requests found
...@@ -96,9 +96,7 @@ else ...@@ -96,9 +96,7 @@ else
fi fi
# Write out and process peptides. # Write out and process peptides.
# First just chr1 peptides, then all peptides. for subset in all
# TODO: switch this back
for subset in chr1 all
do do
if [ "$2" == "reuse-all" ] if [ "$2" == "reuse-all" ]
then then
...@@ -118,19 +116,33 @@ do ...@@ -118,19 +116,33 @@ do
bzip2 proteome_peptides.$subset.csv bzip2 proteome_peptides.$subset.csv
fi fi
# Run mixmhcpred
OUT_DIR=predictions/${subset}.mixmhcpred
REUSE=""
if [ "${2:-reuse-none}" != "reuse-none" ]
then
REUSE="$EXISTING_DATA"/$OUT_DIR
fi
python run_predictors.py \
proteome_peptides.$subset.csv.bz2 \
--result-dtype "float16" \
--predictor mixmhcpred \
--chunk-size 500000 \
--allele $(cat alleles.txt) \
--out "$OUT_DIR" \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \
--reuse-predictions "$REUSE" $EXTRA_ARGS
# Run netmhcpan4 # Run netmhcpan4
for kind in el ba for kind in el ba
do do
OUT_DIR=predictions/${subset}.netmhcpan4.$kind OUT_DIR=predictions/${subset}.netmhcpan4.$kind
REUSE1="" REUSE=""
REUSE2=""
if [ "$subset" == "all" ]
then
REUSE1="predictions/chr1.netmhcpan4.$kind"
fi
if [ "${2:-reuse-none}" != "reuse-none" ] if [ "${2:-reuse-none}" != "reuse-none" ]
then then
REUSE2="$EXISTING_DATA"/$OUT_DIR REUSE="$EXISTING_DATA"/$OUT_DIR
fi fi
python run_predictors.py \ python run_predictors.py \
...@@ -138,11 +150,11 @@ do ...@@ -138,11 +150,11 @@ do
--result-dtype "float16" \ --result-dtype "float16" \
--predictor netmhcpan4-$kind \ --predictor netmhcpan4-$kind \
--chunk-size 1000 \ --chunk-size 1000 \
--allele $(cat alleles.txt | grep -v '31:0102') \ --allele $(cat alleles.txt) \
--out "$OUT_DIR" \ --out "$OUT_DIR" \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \ --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \
--reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS --reuse-predictions "$REUSE" $EXTRA_ARGS
done done
...@@ -150,15 +162,10 @@ do ...@@ -150,15 +162,10 @@ do
for kind in combined for kind in combined
do do
OUT_DIR=predictions/${subset}.mhcflurry.${kind} OUT_DIR=predictions/${subset}.mhcflurry.${kind}
REUSE1="" REUSE=""
REUSE2=""
if [ "$subset" == "all" ]
then
REUSE1="predictions/chr1.mhcflurry.${kind}"
fi
if [ "${2:-reuse-none}" != "reuse-none" ] && [ "${2:-reuse-none}" != "reuse-predictions-except-mhcflurry" ] if [ "${2:-reuse-none}" != "reuse-none" ] && [ "${2:-reuse-none}" != "reuse-predictions-except-mhcflurry" ]
then then
REUSE2="$EXISTING_DATA"/$OUT_DIR REUSE="$EXISTING_DATA"/$OUT_DIR
fi fi
python run_predictors.py \ python run_predictors.py \
...@@ -172,7 +179,7 @@ do ...@@ -172,7 +179,7 @@ do
--out "$OUT_DIR" \ --out "$OUT_DIR" \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.gpu.lsf \ --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.gpu.lsf \
--reuse-predictions "$REUSE1" "$REUSE2" $EXTRA_ARGS --reuse-predictions "$REUSE" $EXTRA_ARGS
done done
done done
......
...@@ -43,7 +43,7 @@ parser.add_argument( ...@@ -43,7 +43,7 @@ parser.add_argument(
parser.add_argument( parser.add_argument(
"--predictor", "--predictor",
required=True, required=True,
choices=("mhcflurry", "netmhcpan4-ba", "netmhcpan4-el")) choices=("mhcflurry", "netmhcpan4-ba", "netmhcpan4-el", "mixmhcpred"))
parser.add_argument( parser.add_argument(
"--mhcflurry-models-dir", "--mhcflurry-models-dir",
metavar="DIR", metavar="DIR",
...@@ -90,6 +90,7 @@ PREDICTOR_TO_COLS = { ...@@ -90,6 +90,7 @@ PREDICTOR_TO_COLS = {
"mhcflurry": ["affinity"], "mhcflurry": ["affinity"],
"netmhcpan4-ba": ["affinity", "percentile_rank"], "netmhcpan4-ba": ["affinity", "percentile_rank"],
"netmhcpan4-el": ["elution_score"], "netmhcpan4-el": ["elution_score"],
"mixmhcpred": ["elution_score"],
} }
...@@ -392,6 +393,11 @@ def do_predictions_mhctools(work_item_dicts, constant_data=None): ...@@ -392,6 +393,11 @@ def do_predictions_mhctools(work_item_dicts, constant_data=None):
alleles=alleles, alleles=alleles,
program_name="netMHCpan-4.0", program_name="netMHCpan-4.0",
mode="elution_score") mode="elution_score")
elif predictor_name == "mixmhcpred":
predictor = mhctools.MixMHCpred(
alleles=alleles,
program_name="netMHCpan-4.0",
mode="elution_score")
else: else:
raise ValueError("Unsupported", predictor_name) raise ValueError("Unsupported", predictor_name)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment