diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.WITH_HPC_CLUSTER.sh
index 74de35464282d94610f2fc19190a05ada11aa4ae..d1fe18a9e936ae6ee6698e935832bc7c8c366dae 100755
--- a/downloads-generation/data_mass_spec_benchmark/GENERATE.WITH_HPC_CLUSTER.sh
+++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.WITH_HPC_CLUSTER.sh
@@ -35,9 +35,8 @@ REFERENCES_DIR=$(mhcflurry-downloads path data_references)
 python write_proteome_peptides.py \
     "$PEPTIDES" \
     "${REFERENCES_DIR}/uniprot_proteins.csv.bz2" \
-    --out proteome_peptides.csv
-ls -lh proteome_peptides.csv
-bzip2 proteome_peptides.csv
+    --chromosome 1 \
+    --out proteome_peptides.chr1.csv
 
 python write_allele_list.py "$PEPTIDES" --out alleles.txt
 
@@ -46,12 +45,12 @@ mkdir predictions
 for kind in with_mass_spec no_mass_spec
 do
     python run_mhcflurry.py \
-        proteome_peptides.csv.bz2 \
-        --chunk-size 1000000 \
+        proteome_peptides.chr1.csv \
+        --chunk-size 100000 \
         --batch-size 65536 \
         --models-dir "$(mhcflurry-downloads path models_class1_pan)/models.$kind" \
         --allele $(cat alleles.txt) \
-        --out "predictions/mhcflurry.$kind" \
+        --out "predictions/chr1.mhcflurry.$kind" \
         --verbosity 1 \
         --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
         --cluster-parallelism \
@@ -61,6 +60,8 @@ do
         --cluster-script-prefix-path $SCRIPT_DIR/cluster_submit_script_header.mssm_hpc.lsf
 done
 
+bzip2 proteome_peptides.chr1.csv
+
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 LOG.txt
 RESULT="$SCRATCH_DIR/${DOWNLOAD_NAME}.$(date +%Y%m%d).tar.bz2"
diff --git a/downloads-generation/data_mass_spec_benchmark/write_proteome_peptides.py b/downloads-generation/data_mass_spec_benchmark/write_proteome_peptides.py
index e18daf9ee5d2dd70779e1aed166344c2cc9aa543..29f9f728f58960846c42797f37bb3e7890bb26b5 100644
--- a/downloads-generation/data_mass_spec_benchmark/write_proteome_peptides.py
+++ b/downloads-generation/data_mass_spec_benchmark/write_proteome_peptides.py
@@ -27,6 +27,11 @@ parser.add_argument(
     "--out",
     metavar="OUT.csv",
     help="Out file path")
+parser.add_argument(
+    "--chromosome",
+    metavar="CHR",
+    nargs="+",
+    help="Use only proteins from the specified chromosome(s)")
 parser.add_argument(
     "--debug-max-rows",
     metavar="N",
@@ -62,6 +67,12 @@ def run():
     df = df.loc[~df.protein_ensembl_primary.isnull()]
     print("After: ", len(df))
 
+    if args.chromosome:
+        print("Subselecting to chromosome(s): ", *args.chromosome)
+        print("Before: ", len(df))
+        df = df.loc[df.protein_primary_ensembl_contig.isin(args.chromosome)]
+        print("After: ", len(df))
+
     (flanking_length,) = list(
         set(df.n_flank.str.len().unique()).union(
             set(df.n_flank.str.len().unique())))