From 33dd8ee85e1bed4b70285e095deb7aa3602174a4 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Mon, 9 Jan 2017 19:32:25 -0500
Subject: [PATCH] fixes for kubeface-based model training

---
 .../models_class1_allele_specific_single/README.md    | 11 +++++++----
 .../models_class1_allele_specific_single/models.py    |  5 ++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/downloads-generation/models_class1_allele_specific_single/README.md b/downloads-generation/models_class1_allele_specific_single/README.md
index 58b6880f..12046a00 100644
--- a/downloads-generation/models_class1_allele_specific_single/README.md
+++ b/downloads-generation/models_class1_allele_specific_single/README.md
@@ -11,10 +11,13 @@ To generate this download we run:
 ```
 # If you are running dask distributed using our kubernetes config, you can use the DASK_IP one liner below.
 # Otherwise, just set it to the IP of the dask scheduler.
-DASK_IP=$(kubectl get service | grep daskd-scheduler | tr -s ' ' | cut -d ' ' -f 3)
 ./GENERATE.sh \
-    --joblib-num-jobs 100 \
-    --joblib-pre-dispatch all \
     --cv-folds-per-task 10 \
-    --dask-scheduler $DASK_IP:8786
+    --backend kubernetes \
+    --storage-prefix gs://kubeface \
+    --worker-image hammerlab/mhcflurry:latest \
+    --kubernetes-task-resources-memory-mb 10000 \
+    --worker-path-prefix venv-py3/bin \
+    --max-simultaneous-tasks 200 \
+
 ```
diff --git a/downloads-generation/models_class1_allele_specific_single/models.py b/downloads-generation/models_class1_allele_specific_single/models.py
index 6375cd45..30f8e3d5 100644
--- a/downloads-generation/models_class1_allele_specific_single/models.py
+++ b/downloads-generation/models_class1_allele_specific_single/models.py
@@ -3,13 +3,12 @@ from mhcflurry.class1_allele_specific.train import HYPERPARAMETER_DEFAULTS
 import json
 
 models = HYPERPARAMETER_DEFAULTS.models_grid(
-    #impute=[False, True],
-    impute=[False],
+    impute=[False, True],
     activation=["tanh"],
     layer_sizes=[[12], [64], [128]],
     embedding_output_dim=[8, 32, 64],
     dropout_probability=[0, .1, .25],
-    # fraction_negative=[0, .1, .2],
+    fraction_negative=[0, .1, .2],
     n_training_epochs=[250])
 
 sys.stderr.write("Models: %d\n" % len(models))
-- 
GitLab