Skip to content
Snippets Groups Projects
Commit ac3f874f authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

Fix tests, add logging to calibrate_percentile_ranks, add SIGUSR1 trick to train command

parent 72362eb2
No related branches found
No related tags found
No related merge requests found
...@@ -484,9 +484,10 @@ class Class1AffinityPredictor(object): ...@@ -484,9 +484,10 @@ class Class1AffinityPredictor(object):
def calibrate_percentile_ranks( def calibrate_percentile_ranks(
self, self,
peptides=None, peptides=None,
num_peptides_per_length=int(1e6), num_peptides_per_length=int(1e5),
alleles=None, alleles=None,
bins=None): bins=None,
quiet=False):
""" """
Compute the cumulative distribution of ic50 values for a set of alleles Compute the cumulative distribution of ic50 values for a set of alleles
over a large universe of random peptides, to enable computing quantiles in over a large universe of random peptides, to enable computing quantiles in
...@@ -503,6 +504,12 @@ class Class1AffinityPredictor(object): ...@@ -503,6 +504,12 @@ class Class1AffinityPredictor(object):
alleles : sequence of string, optional alleles : sequence of string, optional
Alleles to perform calibration for. If not specified all supported Alleles to perform calibration for. If not specified all supported
alleles will be calibrated. alleles will be calibrated.
bins : object
Anything that can be passed to numpy.histogram's "bins" argument
can be used here, i.e. either an integer or a sequence giving bin
edges. This is in ic50 space.
quiet : boolean
If False (default), status updates will be printed to stdout.
""" """
if bins is None: if bins is None:
bins = to_ic50(numpy.linspace(1, 0, 1000)) bins = to_ic50(numpy.linspace(1, 0, 1000))
...@@ -519,11 +526,25 @@ class Class1AffinityPredictor(object): ...@@ -519,11 +526,25 @@ class Class1AffinityPredictor(object):
peptides.extend( peptides.extend(
random_peptides(num_peptides_per_length, length)) random_peptides(num_peptides_per_length, length))
for allele in alleles: if quiet:
def msg(s):
pass
else:
def msg(s):
print(s)
for (i, allele) in enumerate(alleles):
msg("Calibrating percentile ranks for allele %03d/%03d: %s" % (
i + 1, len(alleles), allele))
start = time.time()
predictions = self.predict(peptides, allele=allele) predictions = self.predict(peptides, allele=allele)
msg("Generated %d predictions in %0.2f sec." % (
len(predictions), time.time() - start))
transform = PercentRankTransform() transform = PercentRankTransform()
transform.fit(predictions, bins=bins) transform.fit(predictions, bins=bins)
self.allele_to_percent_rank_transform[allele] = transform self.allele_to_percent_rank_transform[allele] = transform
msg("Done calibrating allele %s in %0.2f sec." % (
allele, time.time() - start))
def percentile_ranks(self, affinities, allele=None, alleles=None, throw=True): def percentile_ranks(self, affinities, allele=None, alleles=None, throw=True):
""" """
......
...@@ -455,7 +455,7 @@ class Class1NeuralNetwork(object): ...@@ -455,7 +455,7 @@ class Class1NeuralNetwork(object):
for (key, value) in fit_history.history.items(): for (key, value) in fit_history.history.items():
self.loss_history[key].extend(value) self.loss_history[key].extend(value)
logging.info( print(
"Epoch %3d / %3d: loss=%g. Min val loss at epoch %s" % ( "Epoch %3d / %3d: loss=%g. Min val loss at epoch %s" % (
i, i,
self.hyperparameters['max_epochs'], self.hyperparameters['max_epochs'],
......
...@@ -7,6 +7,8 @@ import sys ...@@ -7,6 +7,8 @@ import sys
import argparse import argparse
import yaml import yaml
import time import time
import signal
import traceback
import pandas import pandas
...@@ -65,6 +67,10 @@ parser.add_argument( ...@@ -65,6 +67,10 @@ parser.add_argument(
def run(argv=sys.argv[1:]): def run(argv=sys.argv[1:]):
# On sigusr1 print stack trace
print("To show stack trace, run:\nkill -s USR1 %d" % os.getpid())
signal.signal(signal.SIGUSR1, lambda sig, frame: traceback.print_stack())
args = parser.parse_args(argv) args = parser.parse_args(argv)
configure_logging(verbose=args.verbosity > 1) configure_logging(verbose=args.verbosity > 1)
......
...@@ -59,7 +59,7 @@ def test_a1_known_epitopes_in_newly_trained_model(): ...@@ -59,7 +59,7 @@ def test_a1_known_epitopes_in_newly_trained_model():
] ]
hyperparameters = { hyperparameters = {
"max_epochs": 500, "max_epochs": 100,
"patience": 10, "patience": 10,
"early_stopping": True, "early_stopping": True,
"validation_split": 0.2, "validation_split": 0.2,
...@@ -67,15 +67,11 @@ def test_a1_known_epitopes_in_newly_trained_model(): ...@@ -67,15 +67,11 @@ def test_a1_known_epitopes_in_newly_trained_model():
"random_negative_rate": 0.0, "random_negative_rate": 0.0,
"random_negative_constant": 25, "random_negative_constant": 25,
"peptide_amino_acid_encoding": "BLOSUM62",
"use_embedding": False, "use_embedding": False,
"kmer_size": 15, "kmer_size": 15,
"batch_normalization": False, "batch_normalization": False,
"locally_connected_layers": [ "locally_connected_layers": [
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
},
{ {
"filters": 8, "filters": 8,
"activation": "tanh", "activation": "tanh",
...@@ -129,7 +125,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): ...@@ -129,7 +125,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
hyperparameters = dict( hyperparameters = dict(
activation="tanh", activation="tanh",
layer_sizes=[64], layer_sizes=[64],
max_epochs=500, max_epochs=100,
early_stopping=False, early_stopping=False,
validation_split=0.0, validation_split=0.0,
locally_connected_layers=[], locally_connected_layers=[],
...@@ -163,7 +159,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data(): ...@@ -163,7 +159,7 @@ def test_class1_affinity_predictor_a0205_memorize_training_data():
peptides=df.peptide.values, peptides=df.peptide.values,
affinities=df.measurement_value.values, affinities=df.measurement_value.values,
) )
predictor.calibrate_percentile_ranks() predictor.calibrate_percentile_ranks(num_peptides_per_length=1000)
ic50_pred = predictor.predict(df.peptide.values, allele=allele) ic50_pred = predictor.predict(df.peptide.values, allele=allele)
ic50_true = df.measurement_value.values ic50_true = df.measurement_value.values
eq_(len(ic50_pred), len(ic50_true)) eq_(len(ic50_pred), len(ic50_true))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment