From 2ca1b5eecc7e4cb49e1c110e5d29e5a89a911fea Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Tue, 1 Oct 2019 09:37:47 -0400
Subject: [PATCH] fix

---
 downloads-generation/data_mass_spec_benchmark/GENERATE.sh    | 1 +
 .../data_mass_spec_benchmark/run_mhcflurry.py                | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh
index a3fa7887..fde917c4 100755
--- a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh
+++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh
@@ -62,6 +62,7 @@ for kind in with_mass_spec no_mass_spec
 do
     python run_mhcflurry.py \
         proteome_peptides.csv.bz2 \
+        --chunk-size 10000000 \
         --models-dir "$(mhcflurry-downloads path models_class1_pan)/models.$kind" \
         --allele $(cat alleles.txt) \
         --out "predictions/mhcflurry.$kind" \
diff --git a/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py b/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py
index e2421d82..a2602e36 100644
--- a/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py
+++ b/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py
@@ -137,7 +137,10 @@ def run(argv=sys.argv[1:]):
 
     num_chunks = int(math.ceil(len(peptides) / args.chunk_size))
     print("Split peptides into %d chunks" % num_chunks)
-    peptide_chunks = numpy.array_split(peptides, num_chunks)
+    peptide_chunks = [
+        EncodableSequences.create(chunk)
+        for chunk in numpy.array_split(peptides, num_chunks)
+    ]
 
     GLOBAL_DATA["predictor"] = predictor
     GLOBAL_DATA["args"] = {
-- 
GitLab