diff --git a/mhcflurry/train_pan_allele_models_command.py b/mhcflurry/train_pan_allele_models_command.py
index ca48807684cba5773de2d73bf74aa2f6c8af72db..56e4d60b7cbb9f3d22c96a85413ce01d643e7563 100644
--- a/mhcflurry/train_pan_allele_models_command.py
+++ b/mhcflurry/train_pan_allele_models_command.py
@@ -32,8 +32,6 @@ from .allele_encoding import AlleleEncoding
 from .encodable_sequences import EncodableSequences
 from .regression_target import to_ic50, from_ic50
 
-os.environ["CUDA_VISIBLE_DEVICES"] = ""
-
 
 # To avoid pickling large matrices to send to child processes when running in
 # parallel, we use this global variable as a place to store data. Data that is
diff --git a/test/test_train_and_related_commands.py b/test/test_train_and_related_commands.py
index 197ade0d28463f9dffc539dcf2705a3a17366a84..f6abfd2630108274beb8a2529a94a3bb98fb21fb 100644
--- a/test/test_train_and_related_commands.py
+++ b/test/test_train_and_related_commands.py
@@ -14,6 +14,8 @@ from numpy.testing import assert_array_less, assert_equal
 from mhcflurry import Class1AffinityPredictor
 from mhcflurry.downloads import get_path
 
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
 HYPERPARAMETERS = [
     {
         "n_models": 2,
diff --git a/test/test_train_pan_allele_models_command.py b/test/test_train_pan_allele_models_command.py
index 870c3694fb0a9f0c45b1c3f4f2fb45b0428be0a5..25a9bdc7ed81c87c5a83ba8dd86e0f5bb186cf5e 100644
--- a/test/test_train_pan_allele_models_command.py
+++ b/test/test_train_pan_allele_models_command.py
@@ -18,6 +18,8 @@ from mhcflurry import Class1AffinityPredictor,Class1NeuralNetwork
 from mhcflurry.allele_encoding import AlleleEncoding
 from mhcflurry.downloads import get_path
 
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
 
 HYPERPARAMETERS_LIST = [
 {