diff --git a/downloads-generation/models_class1_pan_refined/GENERATE.sh b/downloads-generation/models_class1_pan_refined/GENERATE.sh
index 17d63e6fbb5175efcf19faa198fbe2f277d2e64d..a0cf2a4b7ba958a0c9cbb20b216561e3665d8513 100755
--- a/downloads-generation/models_class1_pan_refined/GENERATE.sh
+++ b/downloads-generation/models_class1_pan_refined/GENERATE.sh
@@ -98,7 +98,6 @@ else
         --expression "$(mhcflurry-downloads path data_curated)/rna_expression.csv.bz2" \
         --decoys-per-hit 1 \
         --out train.multiallelic.csv \
-        --alleles "HLA-A*02:01" "HLA-B*27:01" "HLA-C*07:01" "HLA-A*03:01" "HLA-B*15:01" "HLA-C*01:02"
 fi
 
 ALLELE_LIST=$(bzcat "$MONOALLELIC_TRAIN" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq)
@@ -113,6 +112,7 @@ time mhcflurry-multiallelic-refinement \
     --out-affinity-predictor-dir $(pwd)/models.affinity \
     --out-presentation-predictor-dir $(pwd)/models.presentation \
     --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
+    --only-alleles-with-mass-spec \
     $PARALLELISM_ARGS
 
 time mhcflurry-calibrate-percentile-ranks \
diff --git a/mhcflurry/multiallelic_refinement_command.py b/mhcflurry/multiallelic_refinement_command.py
index ac302861f67e30075375763068b4ad58a8566830..63371e58d18929cd768c4595b1dbab5d24c5ac90 100644
--- a/mhcflurry/multiallelic_refinement_command.py
+++ b/mhcflurry/multiallelic_refinement_command.py
@@ -72,6 +72,10 @@ parser.add_argument(
     "--max-models",
     type=int,
     default=None)
+parser.add_argument(
+    "--only-alleles-with-mass-spec",
+    type=int,
+    default=None)
 parser.add_argument(
     "--verbosity",
     type=int,
@@ -106,6 +110,23 @@ def run(argv=sys.argv[1:]):
     monoallelic_df = pandas.read_csv(args.monoallelic_data)
     print("Loaded monoallelic data: %s" % (str(monoallelic_df.shape)))
 
+    if args.only_alleles_with_mass_spec:
+        multiallelic_alleles = set()
+        for hla in multiallelic_df.hla.unique():
+            multiallelic_alleles.update(hla.split())
+        print(
+            "Multiallelic alleles (%d)" % len(multiallelic_alleles),
+            multiallelic_alleles)
+        new_monoallelic_df = monoallelic_df.loc[
+            monoallelic_df.allele.isin((multiallelic_alleles))
+        ].copy()
+        print(
+            "Allele selection reduced monoallelic data from",
+            len(monoallelic_df),
+            "to",
+            len(new_monoallelic_df))
+        monoallelic_df = new_monoallelic_df
+
     input_predictor = Class1AffinityPredictor.load(
         args.models_dir, optimization_level=0, max_models=args.max_models)
     print("Loaded: %s" % input_predictor)