diff --git a/mhcflurry/class1_allele_specific/cross_validation.py b/mhcflurry/class1_allele_specific/cross_validation.py index 6bcd092cbe2b4c4fe11e4bff273c6c3e8d04a909..5ceeb4b729e6f13c8dd9fa2ff4d5e12839ace6e1 100644 --- a/mhcflurry/class1_allele_specific/cross_validation.py +++ b/mhcflurry/class1_allele_specific/cross_validation.py @@ -126,9 +126,9 @@ def cross_validation_folds( impute_kwargs : dict, optional Additional kwargs to pass to mhcflurry.Dataset.impute_missing_values. - n_jobs : integer, optional - The number of jobs to run in parallel. If -1, then the number of jobs - is set to the number of cores. + parallel_backend : mhcflurry.parallelism.ParallelBackend, optional + Futures implementation to use for running on multiple threads, + processes, or nodes Returns ----------- diff --git a/mhcflurry/class1_allele_specific/train.py b/mhcflurry/class1_allele_specific/train.py index c4f29e4936adb83d235c5bdba1683a8c181f34a2..7e5824b88163e60d22a846c32adb6b0582297457 100644 --- a/mhcflurry/class1_allele_specific/train.py +++ b/mhcflurry/class1_allele_specific/train.py @@ -259,7 +259,9 @@ def train_across_models_and_folds( return_predictors : boolean, optional Include the trained predictors in the result. - parallel_backend : parallel backend, optional + parallel_backend : mhcflurry.parallelism.ParallelBackend, optional + Futures implementation to use for running on multiple threads, + processes, or nodes Returns ----------- diff --git a/mhcflurry/parallelism.py b/mhcflurry/parallelism.py index 9bce9c015a5115aeb1cc1b2950f20752f3d8559b..18008b4e9057c3b4e0c3b9c6d1a2db81522beb5f 100644 --- a/mhcflurry/parallelism.py +++ b/mhcflurry/parallelism.py @@ -5,6 +5,10 @@ DEFAULT_BACKEND = None class ParallelBackend(object): + """ + Thin wrapper of futures implementations. Designed to support + concurrent.futures as well as dask.distributed's workalike implementation. + """ def __init__(self, executor, module, verbose=1): self.executor = executor self.module = module @@ -33,6 +37,9 @@ class ParallelBackend(object): class DaskDistributedParallelBackend(ParallelBackend): + """ + ParallelBackend that uses dask.distributed + """ def __init__(self, scheduler_ip_and_port, verbose=1): from dask import distributed # pylint: disable=import-error executor = distributed.Executor(scheduler_ip_and_port) @@ -46,6 +53,10 @@ class DaskDistributedParallelBackend(ParallelBackend): class ConcurrentFuturesParallelBackend(ParallelBackend): + """ + ParallelBackend that uses Python's concurrent.futures module. + Can use either threads or processes. + """ def __init__(self, num_workers=1, processes=False, verbose=1): if processes: executor = futures.ProcessPoolExecutor(num_workers) diff --git a/mhcflurry/predict.py b/mhcflurry/predict.py index 8b886b4e3546d6285bd210efadbcc57bb19a558a..6534971ef67c20c15c0645800c6ab8af828f6ccb 100644 --- a/mhcflurry/predict.py +++ b/mhcflurry/predict.py @@ -22,6 +22,9 @@ from .common import normalize_allele_name, UnsupportedAllele def predict(alleles, peptides, loaders=None): """ + Make predictions across all combinations of the specified alleles and + peptides. + Parameters ---------- alleles : list of str @@ -30,6 +33,9 @@ def predict(alleles, peptides, loaders=None): peptides : list of str Peptide amino acid sequences. + loaders : list of Class1AlleleSpecificPredictorLoader, optional + Loaders to try. Will be tried in the order given. + Returns DataFrame with columns "Allele", "Peptide", and "Prediction" """ if loaders is None: @@ -64,4 +70,3 @@ def predict(alleles, peptides, loaders=None): result_dict["Peptide"].append(peptides[i]) result_dict["Prediction"].append(ic50) return pd.DataFrame(result_dict) -