diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh index 474768ea9ad7d93187bddb8fb37b012393f63bf6..3328779eeddae01f2764315ea3ada9312e0df301 100755 --- a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh +++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh @@ -96,8 +96,7 @@ else fi # Write out and process peptides. -#for subset in all -for subset in chr1 +for subset in all do if [ "$2" == "reuse-all" ] then @@ -126,7 +125,7 @@ do fi python run_predictors.py \ - proteome_peptides.$subset.csv.bz2 \ + "$(pwd)/proteome_peptides.$subset.csv.bz2" \ --result-dtype "float16" \ --predictor mixmhcpred \ --chunk-size 500000 \ @@ -134,7 +133,6 @@ do --out "$OUT_DIR" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.nogpu.lsf \ - --max-peptides 1000 \ --reuse-predictions "$REUSE" $EXTRA_ARGS # Run netmhcpan4 @@ -148,7 +146,7 @@ do fi python run_predictors.py \ - proteome_peptides.$subset.csv.bz2 \ + "$(pwd)/proteome_peptides.$subset.csv.bz2" \ --result-dtype "float16" \ --predictor netmhcpan4-$kind \ --chunk-size 1000 \ @@ -171,7 +169,7 @@ do fi python run_predictors.py \ - proteome_peptides.${subset}.csv.bz2 \ + "$(pwd)/proteome_peptides.$subset.csv.bz2" \ --result-dtype "float16" \ --predictor mhcflurry \ --chunk-size 500000 \ diff --git a/downloads-generation/data_mass_spec_benchmark/run_predictors.py b/downloads-generation/data_mass_spec_benchmark/run_predictors.py index 5bdd02db8a4d2d3486f613e5b9dafb6c2bc6f820..d0e5637967883baac7b9d03460c565bfc4c90fa2 100644 --- a/downloads-generation/data_mass_spec_benchmark/run_predictors.py +++ b/downloads-generation/data_mass_spec_benchmark/run_predictors.py @@ -89,8 +89,8 @@ add_cluster_parallelism_args(parser) PREDICTOR_TO_COLS = { "mhcflurry": ["affinity"], "netmhcpan4-ba": ["affinity", "percentile_rank"], - "netmhcpan4-el": ["elution_score"], - "mixmhcpred": ["elution_score"], + "netmhcpan4-el": ["score"], + "mixmhcpred": ["score"], } @@ -405,7 +405,7 @@ def do_predictions_mhctools(work_item_dicts, constant_data=None): # to execute. empty_results = pandas.Series(index=peptides, dtype=numpy.float16) - empty_results[:] = numpy.inf + empty_results[:] = float('-inf') try: predictor.predict_peptides_dataframe(["PEPTIDESS"]) mixmhcpred_usable_alleles.append(allele) @@ -416,9 +416,8 @@ def do_predictions_mhctools(work_item_dicts, constant_data=None): print("MixMHCpred usable alleles: ", *mixmhcpred_usable_alleles) print("MixMHCpred unusable alleles: ", *unusable_alleles) - predictor = mhctools.MixMHCpred(alleles=alleles) + predictor = mhctools.MixMHCpred(alleles=mixmhcpred_usable_alleles) assert mixmhcpred_usable_alleles, mixmhcpred_usable_alleles - alleles = mixmhcpred_usable_alleles else: raise ValueError("Unsupported", predictor_name) diff --git a/downloads-generation/models_class1_pan_variants/GENERATE.sh b/downloads-generation/models_class1_pan_variants/GENERATE.sh index 585fd8197d26eb5cc32b2bcb98c0fb74db0a1512..76737300a498b4b9fd6f0d93fecc2a7fa3b234dd 100755 --- a/downloads-generation/models_class1_pan_variants/GENERATE.sh +++ b/downloads-generation/models_class1_pan_variants/GENERATE.sh @@ -68,7 +68,7 @@ then python generate_hyperparameters.py hyperparameters.production.yaml compact_peptide > hyperparameters.compact_peptide.yaml fi -for kind in 34mer_sequence single_hidden_no_pretrain no_pretrain compact_peptide no_additional_ms ms_only affinity_only +for kind in no_additional_ms ms_only no_pretrain compact_peptide 34mer_sequence single_hidden_no_pretrain affinity_only do CONTINUE_INCOMPLETE_ARGS="" if [ "$2" == "continue-incomplete" ] && [ -d "models.unselected.${kind}" ] @@ -116,7 +116,7 @@ done echo "Done training. Beginning model selection." -for kind in single_hidden_no_pretrain no_pretrain 34mer_sequence compact_peptide no_additional_ms ms_only affinity_only +for kind in no_additional_ms ms_only no_pretrain compact_peptide 34mer_sequence single_hidden_no_pretrain affinity_only do MODELS_DIR="models.unselected.${kind}" mhcflurry-class1-select-pan-allele-models \ diff --git a/mhcflurry/downloads.yml b/mhcflurry/downloads.yml index bc8accf0054b2f6df1b33536cbdc30d25d22fbb3..f3d0160fc1cd594ebf112b0a2bcb98dd1daf6169 100644 --- a/mhcflurry/downloads.yml +++ b/mhcflurry/downloads.yml @@ -21,11 +21,11 @@ releases: compatibility-version: 2 downloads: - name: models_class1_pan - url: https://github.com/openvax/mhcflurry/releases/download/1.6.0/models_class1_pan.selected.20200102.tar.bz2 + url: https://github.com/openvax/mhcflurry/releases/download/1.6.0/models_class1_pan.selected.20200104.tar.bz2 default: false - name: models_class1_pan_unselected - url: https://github.com/openvax/mhcflurry/releases/download/1.6.0/models_class1_pan.20200102.tar.bz2 + url: https://github.com/openvax/mhcflurry/releases/download/1.6.0/models_class1_pan.20200104.tar.bz2 default: false - name: models_class1_pan_refined