From ac3f874fa90dc032bf592b2f364a5564c9cd8373 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Sat, 25 Nov 2017 11:13:12 -0500 Subject: [PATCH] Fix tests, add logging to calibrate_percentile_ranks, add SIGUSR1 trick to train command --- .../class1_affinity_predictor.py | 27 ++++++++++++++++--- .../class1_neural_network.py | 2 +- .../train_allele_specific_models_command.py | 6 +++++ test/test_class1_affinity_predictor.py | 12 +++------ 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py index c0ec13fb..e61d46f7 100644 --- a/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py @@ -484,9 +484,10 @@ class Class1AffinityPredictor(object): def calibrate_percentile_ranks( self, peptides=None, - num_peptides_per_length=int(1e6), + num_peptides_per_length=int(1e5), alleles=None, - bins=None): + bins=None, + quiet=False): """ Compute the cumulative distribution of ic50 values for a set of alleles over a large universe of random peptides, to enable computing quantiles in @@ -503,6 +504,12 @@ class Class1AffinityPredictor(object): alleles : sequence of string, optional Alleles to perform calibration for. If not specified all supported alleles will be calibrated. + bins : object + Anything that can be passed to numpy.histogram's "bins" argument + can be used here, i.e. either an integer or a sequence giving bin + edges. This is in ic50 space. + quiet : boolean + If False (default), status updates will be printed to stdout. """ if bins is None: bins = to_ic50(numpy.linspace(1, 0, 1000)) @@ -519,11 +526,25 @@ class Class1AffinityPredictor(object): peptides.extend( random_peptides(num_peptides_per_length, length)) - for allele in alleles: + if quiet: + def msg(s): + pass + else: + def msg(s): + print(s) + + for (i, allele) in enumerate(alleles): + msg("Calibrating percentile ranks for allele %03d/%03d: %s" % ( + i + 1, len(alleles), allele)) + start = time.time() predictions = self.predict(peptides, allele=allele) + msg("Generated %d predictions in %0.2f sec." % ( + len(predictions), time.time() - start)) transform = PercentRankTransform() transform.fit(predictions, bins=bins) self.allele_to_percent_rank_transform[allele] = transform + msg("Done calibrating allele %s in %0.2f sec." % ( + allele, time.time() - start)) def percentile_ranks(self, affinities, allele=None, alleles=None, throw=True): """ diff --git a/mhcflurry/class1_affinity_prediction/class1_neural_network.py b/mhcflurry/class1_affinity_prediction/class1_neural_network.py index d685669c..f37827f3 100644 --- a/mhcflurry/class1_affinity_prediction/class1_neural_network.py +++ b/mhcflurry/class1_affinity_prediction/class1_neural_network.py @@ -455,7 +455,7 @@ class Class1NeuralNetwork(object): for (key, value) in fit_history.history.items(): self.loss_history[key].extend(value) - logging.info( + print( "Epoch %3d / %3d: loss=%g. Min val loss at epoch %s" % ( i, self.hyperparameters['max_epochs'], diff --git a/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py b/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py index aa12add4..bdc90dd8 100644 --- a/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py +++ b/mhcflurry/class1_affinity_prediction/train_allele_specific_models_command.py @@ -7,6 +7,8 @@ import sys import argparse import yaml import time +import signal +import traceback import pandas @@ -65,6 +67,10 @@ parser.add_argument( def run(argv=sys.argv[1:]): + # On sigusr1 print stack trace + print("To show stack trace, run:\nkill -s USR1 %d" % os.getpid()) + signal.signal(signal.SIGUSR1, lambda sig, frame: traceback.print_stack()) + args = parser.parse_args(argv) configure_logging(verbose=args.verbosity > 1) diff --git a/test/test_class1_affinity_predictor.py b/test/test_class1_affinity_predictor.py index 952e4840..3ab6277c 100644 --- a/test/test_class1_affinity_predictor.py +++ b/test/test_class1_affinity_predictor.py @@ -59,7 +59,7 @@ def test_a1_known_epitopes_in_newly_trained_model(): ] hyperparameters = { - "max_epochs": 500, + "max_epochs": 100, "patience": 10, "early_stopping": True, "validation_split": 0.2, @@ -67,15 +67,11 @@ def test_a1_known_epitopes_in_newly_trained_model(): "random_negative_rate": 0.0, "random_negative_constant": 25, + "peptide_amino_acid_encoding": "BLOSUM62", "use_embedding": False, "kmer_size": 15, "batch_normalization": False, "locally_connected_layers": [ - { - "filters": 8, - "activation": "tanh", - "kernel_size": 3 - }, { "filters": 8, "activation": "tanh", @@ -129,7 +125,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): hyperparameters = dict( activation="tanh", layer_sizes=[64], - max_epochs=500, + max_epochs=100, early_stopping=False, validation_split=0.0, locally_connected_layers=[], @@ -163,7 +159,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): peptides=df.peptide.values, affinities=df.measurement_value.values, ) - predictor.calibrate_percentile_ranks() + predictor.calibrate_percentile_ranks(num_peptides_per_length=1000) ic50_pred = predictor.predict(df.peptide.values, allele=allele) ic50_true = df.measurement_value.values eq_(len(ic50_pred), len(ic50_true)) -- GitLab