Skip to content
Snippets Groups Projects
Commit 4dfbc39b authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

Simplify data_evaluation - no more monoallelic

parent f825e927
No related branches found
No related tags found
No related merge requests found
......@@ -55,29 +55,29 @@ else
fi
### GENERATE BENCHMARK: MONOALLELIC
for kind in train_excluded all
do
EXCLUDE_TRAIN_DATA=""
if [ "$kind" == "train_excluded" ]
then
EXCLUDE_TRAIN_DATA="$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms/train_data.csv.bz2"
fi
if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.monoallelic.$kind.csv.bz2" ]
then
echo "Reusing existing monoallelic benchmark: benchmark.monoallelic.$kind.csv.bz2"
else
cp $SCRIPT_DIR/make_benchmark.py .
time python make_benchmark.py \
--hits "$(pwd)/hits_with_tpm.csv.bz2" \
--proteome-peptides "$(mhcflurry-downloads path data_predictions)/proteome_peptides.all.csv.bz2" \
--decoys-per-hit 110 \
--exclude-train-data "$EXCLUDE_TRAIN_DATA" \
--only-format MONOALLELIC \
--out "$(pwd)/benchmark.monoallelic.$kind.csv"
bzip2 -f benchmark.monoallelic.$kind.csv
fi
done
#for kind in train_excluded all
#do
# EXCLUDE_TRAIN_DATA=""
# if [ "$kind" == "train_excluded" ]
# then
# EXCLUDE_TRAIN_DATA="$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms/train_data.csv.bz2"
# fi
#
# if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.monoallelic.$kind.csv.bz2" ]
# then
# echo "Reusing existing monoallelic benchmark: benchmark.monoallelic.$kind.csv.bz2"
# else
# cp $SCRIPT_DIR/make_benchmark.py .
# time python make_benchmark.py \
# --hits "$(pwd)/hits_with_tpm.csv.bz2" \
# --proteome-peptides "$(mhcflurry-downloads path data_predictions)/proteome_peptides.all.csv.bz2" \
# --decoys-per-hit 110 \
# --exclude-train-data "$EXCLUDE_TRAIN_DATA" \
# --only-format MONOALLELIC \
# --out "$(pwd)/benchmark.monoallelic.$kind.csv"
# bzip2 -f benchmark.monoallelic.$kind.csv
# fi
#done
### GENERATE BENCHMARK: MULTIALLELIC
for kind in train_excluded all
......@@ -110,21 +110,21 @@ mkdir commands
for kind in train_excluded all
do
### AFFINITY PREDICTOR VARIANT: MONOALLELIC
if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.monoallelic.no_additional_ms.$kind.csv.bz2" ]
then
echo "Reusing existing monoallelic benchmark predictions"
else
echo time mhcflurry-predict \
"$(pwd)/benchmark.monoallelic.$kind.csv.bz2" \
--allele-column hla \
--prediction-column-prefix no_additional_ms_ \
--models \""$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms"\" \
--affinity-only \
--no-affinity-percentile \
--out "$(pwd)/benchmark.monoallelic.no_additional_ms.$kind.csv" \
--no-throw >> commands/monoallelic.$kind.sh
echo bzip2 -f "$(pwd)/benchmark.monoallelic.no_additional_ms.$kind.csv" >> commands/monoallelic.$kind.sh
fi
#if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.monoallelic.no_additional_ms.$kind.csv.bz2" ]
#then
# echo "Reusing existing monoallelic benchmark predictions"
#else
# echo time mhcflurry-predict \
# "$(pwd)/benchmark.monoallelic.$kind.csv.bz2" \
# --allele-column hla \
# --prediction-column-prefix no_additional_ms_ \
# --models \""$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms"\" \
# --affinity-only \
# --no-affinity-percentile \
# --out "$(pwd)/benchmark.monoallelic.no_additional_ms.$kind.csv" \
# --no-throw >> commands/monoallelic.$kind.sh
# echo bzip2 -f "$(pwd)/benchmark.monoallelic.no_additional_ms.$kind.csv" >> commands/monoallelic.$kind.sh
#fi
### AFFINITY PREDICTORS: MULTIALLELIC
......@@ -143,7 +143,8 @@ do
echo bzip2 -f "$(pwd)/benchmark.multiallelic.production.$kind.csv" >> commands/multiallelic.production.$kind.sh
fi
for variant in no_additional_ms compact_peptide affinity_only no_pretrain single_hidden_no_pretrain 500nm
#for variant in no_additional_ms compact_peptide affinity_only no_pretrain single_hidden_no_pretrain 500nm
for variant in 50nm
do
if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.${variant}.$kind.csv.bz2" ]
then
......@@ -195,17 +196,17 @@ do
### PRECOMPUTED ####
for variant in netmhcpan4.ba netmhcpan4.el mixmhcpred
do
if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.monoallelic.${variant}.$kind.csv.bz2" ]
then
echo "Reusing existing monoallelic ${variant}"
else
cp $SCRIPT_DIR/join_with_precomputed.py .
echo time python join_with_precomputed.py \
\""$(pwd)/benchmark.monoallelic.$kind.csv.bz2"\" \
${variant} \
--out "$(pwd)/benchmark.monoallelic.${variant}.$kind.csv" >> commands/monoallelic.${variant}.$kind.sh
echo bzip2 -f "$(pwd)/benchmark.monoallelic.${variant}.$kind.csv" >> commands/monoallelic.${variant}.$kind.sh
fi
#if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.monoallelic.${variant}.$kind.csv.bz2" ]
#then
# echo "Reusing existing monoallelic ${variant}"
#else
# cp $SCRIPT_DIR/join_with_precomputed.py .
# echo time python join_with_precomputed.py \
# \""$(pwd)/benchmark.monoallelic.$kind.csv.bz2"\" \
# ${variant} \
# --out "$(pwd)/benchmark.monoallelic.${variant}.$kind.csv" >> commands/monoallelic.${variant}.$kind.sh
# echo bzip2 -f "$(pwd)/benchmark.monoallelic.${variant}.$kind.csv" >> commands/monoallelic.${variant}.$kind.sh
#fi
if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.${variant}.$kind.csv.bz2" ]
then
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment