diff --git a/mhcflurry/train_allele_specific_models_command.py b/mhcflurry/train_allele_specific_models_command.py index c9b018b210ba4aa436e8b192a75f8878dadbf37a..cd5505c133a8e95007d225cd72e3ab227163f10d 100644 --- a/mhcflurry/train_allele_specific_models_command.py +++ b/mhcflurry/train_allele_specific_models_command.py @@ -7,6 +7,7 @@ import signal import sys import time import traceback +import random from multiprocessing import Pool, Queue, cpu_count from functools import partial from pprint import pprint @@ -272,6 +273,10 @@ def run(argv=sys.argv[1:]): if worker_pool: print("Processing %d work items in parallel." % len(work_items)) + # The estimated time to completion is more accurate if we randomize + # the order of the work. + random.shuffle(work_items) + # We sort here so the predictors are in order of hyperparameter set num. # This is convenient so that the neural networks get merged for each # allele in the same order.