From 9d5d36b01d18f6c657b2d9b67a39d6161cd21427 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Fri, 10 Feb 2017 14:30:19 -0500 Subject: [PATCH] Comment tweaks --- .../models_class1_allele_specific_single/README.md | 6 ++---- mhcflurry/class1_allele_specific/cross_validation.py | 3 +++ test/test_class1_allele_specific_cv_and_train_command.py | 3 +++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/downloads-generation/models_class1_allele_specific_single/README.md b/downloads-generation/models_class1_allele_specific_single/README.md index 12046a00..0003e3cd 100644 --- a/downloads-generation/models_class1_allele_specific_single/README.md +++ b/downloads-generation/models_class1_allele_specific_single/README.md @@ -2,15 +2,13 @@ This download contains trained MHC Class I allele-specific MHCflurry models. The training data used is in the [data_combined_iedb_kim2014](../data_combined_iedb_kim2014) MHCflurry download. We first select network hyperparameters for each allele individually using cross validation over the models enumerated in [models.py](models.py). The best hyperparameter settings are selected via average of AUC (at 500nm), F1, and Kendall's Tau over the training folds. We then train the production models over the full training set using the selected hyperparameters. -The training script supports multi-node parallel execution using the [dask-distributed](https://distributed.readthedocs.io/en/latest/) library. To enable this, pass the IP and port of the dask scheduler to the training script with the '--dask-scheduler' option. The GENERATE.sh script passes all arguments to the training script so you can just give it as an argument to GENERATE.sh. +The training script supports multi-node parallel execution using the [kubeface](https://github.com/hammerlab/kubeface) librarie. -We run dask distributed on Google Container Engine using Kubernetes as described [here](https://github.com/hammerlab/dask-distributed-on-kubernetes). +To use kubeface, you should make a google storage bucket and pass it below with the --storage-prefix argument. To generate this download we run: ``` -# If you are running dask distributed using our kubernetes config, you can use the DASK_IP one liner below. -# Otherwise, just set it to the IP of the dask scheduler. ./GENERATE.sh \ --cv-folds-per-task 10 \ --backend kubernetes \ diff --git a/mhcflurry/class1_allele_specific/cross_validation.py b/mhcflurry/class1_allele_specific/cross_validation.py index 184bae58..1bf0038f 100644 --- a/mhcflurry/class1_allele_specific/cross_validation.py +++ b/mhcflurry/class1_allele_specific/cross_validation.py @@ -187,6 +187,9 @@ def cross_validation_folds( lambda kwargs: impute_and_select_allele(**kwargs), imputation_args) + # Here _replace is a method on named tuples that returns a new named + # tuple with the specified key set to the given value and all other key/ + # values the same as the original. return [ result_fold._replace(imputed_train=imputation_result) for (result_fold, imputation_result) in zip( diff --git a/test/test_class1_allele_specific_cv_and_train_command.py b/test/test_class1_allele_specific_cv_and_train_command.py index 99d1a991..bcc71db1 100644 --- a/test/test_class1_allele_specific_cv_and_train_command.py +++ b/test/test_class1_allele_specific_cv_and_train_command.py @@ -69,6 +69,9 @@ def test_small_run(): "--num-local-threads", "1", ] if KUBEFACE_INSTALLED: + # If kubeface is installed, then this command will by default use it. + # In that case, we want to have the kubeface storage written to a + # local file and not assume the existence of a google storage bucket. args.extend(["--storage-prefix", "/tmp/"]) print("Running cv_and_train_command with args: %s " % str(args)) -- GitLab