diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh index a3fa7887acab3b663d62505a709bb19014d8a000..fde917c4340f66e4db56e0367dd9f6412fb78f09 100755 --- a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh +++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh @@ -62,6 +62,7 @@ for kind in with_mass_spec no_mass_spec do python run_mhcflurry.py \ proteome_peptides.csv.bz2 \ + --chunk-size 10000000 \ --models-dir "$(mhcflurry-downloads path models_class1_pan)/models.$kind" \ --allele $(cat alleles.txt) \ --out "predictions/mhcflurry.$kind" \ diff --git a/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py b/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py index e2421d8259fa0cf5f6655e4ede46aa334fa7d076..a2602e361865abf0b41d3f730a849d524d72878b 100644 --- a/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py +++ b/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py @@ -137,7 +137,10 @@ def run(argv=sys.argv[1:]): num_chunks = int(math.ceil(len(peptides) / args.chunk_size)) print("Split peptides into %d chunks" % num_chunks) - peptide_chunks = numpy.array_split(peptides, num_chunks) + peptide_chunks = [ + EncodableSequences.create(chunk) + for chunk in numpy.array_split(peptides, num_chunks) + ] GLOBAL_DATA["predictor"] = predictor GLOBAL_DATA["args"] = {