From ac3f874fa90dc032bf592b2f364a5564c9cd8373 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Sat, 25 Nov 2017 11:13:12 -0500
Subject: [PATCH] Fix tests, add logging to calibrate_percentile_ranks, add
 SIGUSR1 trick to train command

---
 .../class1_affinity_predictor.py              | 27 ++++++++++++++++---
 .../class1_neural_network.py                  |  2 +-
 .../train_allele_specific_models_command.py   |  6 +++++
 test/test_class1_affinity_predictor.py        | 12 +++------
 4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
index c0ec13fb..e61d46f7 100644
--- a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
+++ b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
@@ -484,9 +484,10 @@ class Class1AffinityPredictor(object):
     def calibrate_percentile_ranks(
             self,
             peptides=None,
-            num_peptides_per_length=int(1e6),
+            num_peptides_per_length=int(1e5),
             alleles=None,
-            bins=None):
+            bins=None,
+            quiet=False):
         """
         Compute the cumulative distribution of ic50 values for a set of alleles
         over a large universe of random peptides, to enable computing quantiles in
@@ -503,6 +504,12 @@ class Class1AffinityPredictor(object):
         alleles : sequence of string, optional
             Alleles to perform calibration for. If not specified all supported
             alleles will be calibrated.
+        bins : object
+            Anything that can be passed to numpy.histogram's "bins" argument
+            can be used here, i.e. either an integer or a sequence giving bin
+            edges. This is in ic50 space.
+        quiet : boolean
+            If False (default), status updates will be printed to stdout.
         """
         if bins is None:
             bins = to_ic50(numpy.linspace(1, 0, 1000))
@@ -519,11 +526,25 @@ class Class1AffinityPredictor(object):
                 peptides.extend(
                     random_peptides(num_peptides_per_length, length))
 
-        for allele in alleles:
+        if quiet:
+            def msg(s):
+                pass
+        else:
+            def msg(s):
+                print(s)
+
+        for (i, allele) in enumerate(alleles):
+            msg("Calibrating percentile ranks for allele %03d/%03d: %s" % (
+                i + 1, len(alleles), allele))
+            start = time.time()
             predictions = self.predict(peptides, allele=allele)
+            msg("Generated %d predictions in %0.2f sec." % (
+                len(predictions), time.time() - start))
             transform = PercentRankTransform()
             transform.fit(predictions, bins=bins)
             self.allele_to_percent_rank_transform[allele] = transform
+            msg("Done calibrating allele %s in %0.2f sec." % (
+                allele, time.time() - start))
 
     def percentile_ranks(self, affinities, allele=None, alleles=None, throw=True):
         """
diff --git a/mhcflurry/class1_affinity_prediction/class1_neural_network.py b/mhcflurry/class1_affinity_prediction/class1_neural_network.py
index d685669c..f37827f3 100644
--- a/mhcflurry/class1_affinity_prediction/class1_neural_network.py
+++ b/mhcflurry/class1_affinity_prediction/class1_neural_network.py
@@ -455,7 +455,7 @@ class Class1NeuralNetwork(object):
             for (key, value) in fit_history.history.items():
                 self.loss_history[key].extend(value)
 
-            logging.info(
+            print(
                 "Epoch %3d / %3d: loss=%g. Min val loss at epoch %s" % (
                     i,
                     self.hyperparameters['max_epochs'],
diff --git a/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py b/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py
index aa12add4..bdc90dd8 100644
--- a/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py
+++ b/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py
@@ -7,6 +7,8 @@ import sys
 import argparse
 import yaml
 import time
+import signal
+import traceback
 
 import pandas
 
@@ -65,6 +67,10 @@ parser.add_argument(
 
 
 def run(argv=sys.argv[1:]):
+    # On sigusr1 print stack trace
+    print("To show stack trace, run:\nkill -s USR1 %d" % os.getpid())
+    signal.signal(signal.SIGUSR1, lambda sig, frame: traceback.print_stack())
+
     args = parser.parse_args(argv)
 
     configure_logging(verbose=args.verbosity > 1)
diff --git a/test/test_class1_affinity_predictor.py b/test/test_class1_affinity_predictor.py
index 952e4840..3ab6277c 100644
--- a/test/test_class1_affinity_predictor.py
+++ b/test/test_class1_affinity_predictor.py
@@ -59,7 +59,7 @@ def test_a1_known_epitopes_in_newly_trained_model():
     ]
 
     hyperparameters = {
-        "max_epochs": 500,
+        "max_epochs": 100,
         "patience": 10,
         "early_stopping": True,
         "validation_split": 0.2,
@@ -67,15 +67,11 @@ def test_a1_known_epitopes_in_newly_trained_model():
         "random_negative_rate": 0.0,
         "random_negative_constant": 25,
 
+        "peptide_amino_acid_encoding": "BLOSUM62",
         "use_embedding": False,
         "kmer_size": 15,
         "batch_normalization": False,
         "locally_connected_layers": [
-            {
-                "filters": 8,
-                "activation": "tanh",
-                "kernel_size": 3
-            },
             {
                 "filters": 8,
                 "activation": "tanh",
@@ -129,7 +125,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
     hyperparameters = dict(
         activation="tanh",
         layer_sizes=[64],
-        max_epochs=500,
+        max_epochs=100,
         early_stopping=False,
         validation_split=0.0,
         locally_connected_layers=[],
@@ -163,7 +159,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
         peptides=df.peptide.values,
         affinities=df.measurement_value.values,
     )
-    predictor.calibrate_percentile_ranks()
+    predictor.calibrate_percentile_ranks(num_peptides_per_length=1000)
     ic50_pred = predictor.predict(df.peptide.values, allele=allele)
     ic50_true = df.measurement_value.values
     eq_(len(ic50_pred), len(ic50_true))
-- 
GitLab