diff --git a/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh deleted file mode 100755 index 53125eb7bec329ecbbd0d230b8afe809c1064204..0000000000000000000000000000000000000000 --- a/downloads-generation/data_evaluation/GENERATE.WITH_HPC_CLUSTER.sh +++ /dev/null @@ -1 +0,0 @@ -bash GENERATE.sh cluster diff --git a/downloads-generation/data_evaluation/GENERATE.sh b/downloads-generation/data_evaluation/GENERATE.sh index 2d904d9a7d8cc54e5bec5ab4df3e44f5c1ebfb88..9a8c336e510c1fa1453a64812e6f1c17d5b89397 100755 --- a/downloads-generation/data_evaluation/GENERATE.sh +++ b/downloads-generation/data_evaluation/GENERATE.sh @@ -96,8 +96,10 @@ else time mhcflurry-predict \ benchmark.monoallelic.csv.bz2 \ --allele-column hla \ - --prediction-column-prefix prediction.no_additional_ms \ + --prediction-column-prefix no_additional_ms_ \ --models "$(mhcflurry-downloads path models_class1_pan_variants)/models.no_additional_ms" \ + --affinity-only \ + --no-affinity-percentile \ --out benchmark.monoallelic.predictions.csv \ --no-throw bzip2 -f benchmark.monoallelic.predictions.csv @@ -126,27 +128,45 @@ then echo "Reusing existing multiallelic predictions" else cp $SCRIPT_DIR/predict.py . - time python predict.py \ - benchmark.multiallelic.csv \ - --models \ - "$(mhcflurry-downloads path models_class1_pan)/models.combined" \ - "$(mhcflurry-downloads path models_class1_pan_variants)/models.*" \ - --out "$(pwd)/benchmark.multiallelic.predictions.csv" + time mhcflurry-predict \ + benchmark.multiallelic.csv.bz2 \ + --allele-column hla \ + --prediction-column-prefix mhcflurry_production_ \ + --models "$(mhcflurry-downloads path models_class1_pan)/models.combined" \ + --affinity-only \ + --no-affinity-percentile \ + --out "$(pwd)/benchmark.multiallelic.predictions1.csv" + + for variant in no_additional_ms compact_peptide affinity_only no_pretrain single_hidden_no_pretrain + do + time mhcflurry-predict \ + "$(pwd)/benchmark.multiallelic.predictions1.csv" \ + --allele-column hla \ + --prediction-column-prefix "${variant}_" \ + --models "$(mhcflurry-downloads path models_class1_pan_variants)/models.$variant" \ + --affinity-only \ + --no-affinity-percentile \ + --out "$(pwd)/benchmark.multiallelic.predictions1.csv" + done + bzip2 -f benchmark.multiallelic.predictions1.csv rm -f benchmark.multiallelic.predictions2.csv.bz2 fi + ### PRESENTATION: WITH FLANKS if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions2.csv.bz2" ] then echo "Reusing existing multiallelic predictions2" else - mhcflurry-predict-presentation \ - "$(pwd)/benchmark.multiallelic.predictions1.csv" \ - --out "$(pwd)/benchmark.multiallelic.predictions2.csv" \ + time mhcflurry-predict \ + "$(pwd)/benchmark.multiallelic.predictions1.csv.bz2" \ + --allele-column hla \ + --prediction-column-prefix presentation_with_flanks_ \ --models "$(mhcflurry-downloads path models_class1_presentation)/models" \ - --include-details \ - --prediction-col presentation_with_flanks \ + --no-affinity-percentile \ + --out "$(pwd)/benchmark.multiallelic.predictions2.csv" + bzip2 -f benchmark.multiallelic.predictions2.csv rm -f benchmark.multiallelic.predictions3.csv.bz2 fi @@ -156,18 +176,19 @@ if [ "$2" == "continue-incomplete" ] && [ -f "benchmark.multiallelic.predictions then echo "Reusing existing multiallelic predictions3" else - mhcflurry-predict-presentation \ - "$(pwd)/benchmark.multiallelic.predictions2.csv" \ - --out "$(pwd)/benchmark.multiallelic.predictions3.csv" \ + time mhcflurry-predict \ + "$(pwd)/benchmark.multiallelic.predictions2.csv.bz2" \ + --allele-column hla \ + --prediction-column-prefix presentation_with_flanks_ \ --models "$(mhcflurry-downloads path models_class1_presentation)/models" \ - --include-details \ - --prediction-col presentation_without_flanks \ - --no-flanks + --no-affinity-percentile \ + --no-flanking \ + --out "$(pwd)/benchmark.multiallelic.predictions3.csv" + bzip2 -f benchmark.multiallelic.predictions3.csv fi - cp $SCRIPT_ABSOLUTE_PATH . bzip2 -f "$LOG" for i in $(ls LOG-worker.*.txt) ; do bzip2 -f $i ; done diff --git a/downloads-generation/data_evaluation/predict.py b/downloads-generation/data_evaluation/predict.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000