Skip to content
Snippets Groups Projects
Commit b5f9a292 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fixes

parent e9b2576e
No related branches found
No related tags found
No related merge requests found
...@@ -58,9 +58,12 @@ class BatchPlan(object): ...@@ -58,9 +58,12 @@ class BatchPlan(object):
for indices in self.batch_indices_generator(epochs=epochs): for indices in self.batch_indices_generator(epochs=epochs):
batch_x_dict = {} batch_x_dict = {}
for (item, value) in x_dict.items(): for (item, value) in x_dict.items():
assert not numpy.isnan(value[indices]).any(), (item, value)
batch_x_dict[item] = value[indices] batch_x_dict[item] = value[indices]
batch_y_list = [] batch_y_list = []
for value in y_list: for value in y_list:
assert not numpy.isnan(value[indices]).any(), (
len(batch_y_list), value)
batch_y_list.append(value[indices]) batch_y_list.append(value[indices])
yield (batch_x_dict, batch_y_list) yield (batch_x_dict, batch_y_list)
......
...@@ -17,10 +17,7 @@ from .random_negative_peptides import RandomNegativePeptides ...@@ -17,10 +17,7 @@ from .random_negative_peptides import RandomNegativePeptides
from .allele_encoding import MultipleAlleleEncoding, AlleleEncoding from .allele_encoding import MultipleAlleleEncoding, AlleleEncoding
from .auxiliary_input import AuxiliaryInputEncoder from .auxiliary_input import AuxiliaryInputEncoder
from .batch_generator import MultiallelicMassSpecBatchGenerator from .batch_generator import MultiallelicMassSpecBatchGenerator
from .custom_loss import ( from .custom_loss import MSEWithInequalities, MultiallelicMassSpecLoss
MSEWithInequalities,
MultiallelicMassSpecLoss,
ZeroLoss)
class Class1PresentationNeuralNetwork(object): class Class1PresentationNeuralNetwork(object):
...@@ -97,6 +94,7 @@ class Class1PresentationNeuralNetwork(object): ...@@ -97,6 +94,7 @@ class Class1PresentationNeuralNetwork(object):
Dense, Dense,
Flatten, Flatten,
RepeatVector, RepeatVector,
Reshape,
concatenate, concatenate,
Activation, Activation,
Lambda, Lambda,
...@@ -178,13 +176,18 @@ class Class1PresentationNeuralNetwork(object): ...@@ -178,13 +176,18 @@ class Class1PresentationNeuralNetwork(object):
node = layer(input_nodes) node = layer(input_nodes)
layer_name_to_new_node[layer.name] = node layer_name_to_new_node[layer.name] = node
node = Reshape(
(self.hyperparameters['max_alleles'],),
name="unmasked_affinity_matrix_output")(node)
pre_mask_affinity_predictor_matrix_output = node pre_mask_affinity_predictor_matrix_output = node
# Apply allele mask: zero out all outputs corresponding to alleles # Apply allele mask: zero out all outputs corresponding to alleles
# with the special index 0. # with the special index 0.
def alleles_to_mask(x): def alleles_to_mask(x):
import keras.backend as K import keras.backend as K
return K.cast(K.expand_dims(K.not_equal(x, 0.0)), "float32") result = K.cast(K.not_equal(x, 0), "float32")
return result
allele_mask = Lambda(alleles_to_mask, name="allele_mask")(input_alleles) allele_mask = Lambda(alleles_to_mask, name="allele_mask")(input_alleles)
...@@ -193,12 +196,10 @@ class Class1PresentationNeuralNetwork(object): ...@@ -193,12 +196,10 @@ class Class1PresentationNeuralNetwork(object):
allele_mask, allele_mask,
pre_mask_affinity_predictor_matrix_output pre_mask_affinity_predictor_matrix_output
]) ])
node = Reshape(
# First allele (i.e. the first column of the alleles matrix) is given (self.hyperparameters['max_alleles'], 1),
# its own output. This is used for the affinity prediction loss. name="expand_dims_affinity_matrix_output")(
affinity_predictor_output = Lambda( affinity_predictor_matrix_output)
lambda x: x[:, 0], name="affinity_output")(
affinity_predictor_matrix_output)
auxiliary_input = None auxiliary_input = None
if self.hyperparameters['auxiliary_input_features']: if self.hyperparameters['auxiliary_input_features']:
...@@ -228,13 +229,20 @@ class Class1PresentationNeuralNetwork(object): ...@@ -228,13 +229,20 @@ class Class1PresentationNeuralNetwork(object):
bias_initializer=Zeros()) bias_initializer=Zeros())
lifted = TimeDistributed(layer, name="presentation_adjustment") lifted = TimeDistributed(layer, name="presentation_adjustment")
presentation_adjustment = lifted(node) presentation_adjustment = lifted(node)
presentation_adjustment = Reshape(
target_shape=(self.hyperparameters['max_alleles'],),
name="reshaped_presentation_adjustment")(presentation_adjustment)
def logit(x): def logit(x):
import tensorflow as tf import tensorflow as tf
return - tf.log(1. / x - 1.) return - tf.log(
tf.maximum(
tf.math.divide_no_nan(1., x) - 1.,
0.0))
presentation_output_pre_sigmoid = Add()([ presentation_output_pre_sigmoid = Add()([
Lambda(logit)(affinity_predictor_matrix_output), Lambda(logit, name="logit")(affinity_predictor_matrix_output),
presentation_adjustment, presentation_adjustment,
]) ])
pre_mask_presentation_output = Activation( pre_mask_presentation_output = Activation(
...@@ -254,9 +262,8 @@ class Class1PresentationNeuralNetwork(object): ...@@ -254,9 +262,8 @@ class Class1PresentationNeuralNetwork(object):
input_alleles, input_alleles,
] + ([] if auxiliary_input is None else [auxiliary_input]), ] + ([] if auxiliary_input is None else [auxiliary_input]),
outputs=[ outputs=[
affinity_predictor_output, affinity_predictor_matrix_output,
presentation_output, presentation_output,
affinity_predictor_matrix_output
], ],
name="presentation", name="presentation",
) )
...@@ -405,7 +412,12 @@ class Class1PresentationNeuralNetwork(object): ...@@ -405,7 +412,12 @@ class Class1PresentationNeuralNetwork(object):
y1, y1,
]) ])
affinities_loss = MSEWithInequalities() def keras_max(matrix):
import keras.backend as K
result = K.max(matrix, axis=1)
return result
affinities_loss = MSEWithInequalities(transform_function=keras_max)
encoded_y1 = affinities_loss.encode_y( encoded_y1 = affinities_loss.encode_y(
y1_with_random_negatives, y1_with_random_negatives,
inequalities=adjusted_inequalities_with_random_negative) inequalities=adjusted_inequalities_with_random_negative)
...@@ -426,9 +438,12 @@ class Class1PresentationNeuralNetwork(object): ...@@ -426,9 +438,12 @@ class Class1PresentationNeuralNetwork(object):
allele_representations_hash = self.set_allele_representations( allele_representations_hash = self.set_allele_representations(
allele_representations) allele_representations)
loss_reduction = "none"
self.network.compile( self.network.compile(
loss=[affinities_loss.keras_wrapped(), mms_loss.keras_wrapped(), ZeroLoss().keras_wrapped()], loss=[
#loss_weights=[1.0, 1.0, 1.0], affinities_loss.get_keras_loss(reduction=loss_reduction),
mms_loss.get_keras_loss(reduction=loss_reduction),
],
optimizer=self.hyperparameters['optimizer']) optimizer=self.hyperparameters['optimizer'])
if self.hyperparameters['learning_rate'] is not None: if self.hyperparameters['learning_rate'] is not None:
K.set_value( K.set_value(
...@@ -527,7 +542,7 @@ class Class1PresentationNeuralNetwork(object): ...@@ -527,7 +542,7 @@ class Class1PresentationNeuralNetwork(object):
(train_generator, test_generator) = ( (train_generator, test_generator) = (
batch_generator.get_train_and_test_generators( batch_generator.get_train_and_test_generators(
x_dict=x_dict_with_random_negatives, x_dict=x_dict_with_random_negatives,
y_list=[encoded_y1, encoded_y2, encoded_y2], y_list=[encoded_y1, encoded_y2],
epochs=1)) epochs=1))
self.assert_allele_representations_hash(allele_representations_hash) self.assert_allele_representations_hash(allele_representations_hash)
fit_history = self.network.fit_generator( fit_history = self.network.fit_generator(
...@@ -547,7 +562,6 @@ class Class1PresentationNeuralNetwork(object): ...@@ -547,7 +562,6 @@ class Class1PresentationNeuralNetwork(object):
fit_info[key].extend(value) fit_info[key].extend(value)
if numpy.isnan(fit_info['loss'][-1]): if numpy.isnan(fit_info['loss'][-1]):
import ipdb ; ipdb.set_trace()
raise ValueError("NaN loss") raise ValueError("NaN loss")
# Print progress no more often than once every few seconds. # Print progress no more often than once every few seconds.
...@@ -567,7 +581,6 @@ class Class1PresentationNeuralNetwork(object): ...@@ -567,7 +581,6 @@ class Class1PresentationNeuralNetwork(object):
last_progress_print = time.time() last_progress_print = time.time()
if batch_generator.num_test_batches: if batch_generator.num_test_batches:
#import ipdb ; ipdb.set_trace()
val_loss = fit_info['val_loss'][-1] val_loss = fit_info['val_loss'][-1]
if min_val_loss is None or ( if min_val_loss is None or (
val_loss < min_val_loss - val_loss < min_val_loss -
...@@ -609,7 +622,7 @@ class Class1PresentationNeuralNetwork(object): ...@@ -609,7 +622,7 @@ class Class1PresentationNeuralNetwork(object):
Predictions = collections.namedtuple( Predictions = collections.namedtuple(
"ligandone_neural_network_predictions", "ligandone_neural_network_predictions",
"score affinity") "affinity score")
def predict( def predict(
self, self,
...@@ -638,11 +651,8 @@ class Class1PresentationNeuralNetwork(object): ...@@ -638,11 +651,8 @@ class Class1PresentationNeuralNetwork(object):
feature_parameters=self.hyperparameters[ feature_parameters=self.hyperparameters[
'auxiliary_input_feature_parameters']) 'auxiliary_input_feature_parameters'])
predictions = self.Predictions._make([ predictions = self.Predictions._make(
numpy.squeeze(output) self.network.predict(x_dict, batch_size=batch_size))
for output in self.network.predict(
x_dict, batch_size=batch_size)[1:]
])
return predictions return predictions
def set_allele_representations(self, allele_representations): def set_allele_representations(self, allele_representations):
......
...@@ -18,24 +18,15 @@ import pandas ...@@ -18,24 +18,15 @@ import pandas
import mhcnames import mhcnames
from .hyperparameters import HyperparameterDefaults
from .version import __version__ from .version import __version__
from .class1_neural_network import Class1NeuralNetwork, DEFAULT_PREDICT_BATCH_SIZE from .class1_neural_network import Class1NeuralNetwork, DEFAULT_PREDICT_BATCH_SIZE
from .encodable_sequences import EncodableSequences from .encodable_sequences import EncodableSequences
from .regression_target import from_ic50, to_ic50 from .regression_target import from_ic50, to_ic50
from .random_negative_peptides import RandomNegativePeptides from .allele_encoding import MultipleAlleleEncoding
from .allele_encoding import MultipleAlleleEncoding, AlleleEncoding
from .auxiliary_input import AuxiliaryInputEncoder
from .batch_generator import MultiallelicMassSpecBatchGenerator
from .custom_loss import (
MSEWithInequalities,
MultiallelicMassSpecLoss,
ZeroLoss)
from .downloads import get_default_class1_presentation_models_dir from .downloads import get_default_class1_presentation_models_dir
from .class1_presentation_neural_network import Class1PresentationNeuralNetwork from .class1_presentation_neural_network import Class1PresentationNeuralNetwork
from .common import save_weights, load_weights from .common import save_weights, load_weights
class Class1PresentationPredictor(object): class Class1PresentationPredictor(object):
def __init__( def __init__(
self, self,
......
...@@ -57,13 +57,15 @@ class Loss(object): ...@@ -57,13 +57,15 @@ class Loss(object):
def __str__(self): def __str__(self):
return "<Loss: %s>" % self.name return "<Loss: %s>" % self.name
def keras_wrapped(self, reduction="sum_over_batch_size"): def loss(self, y_true, y_pred):
raise NotImplementedError()
def get_keras_loss(self, reduction="sum_over_batch_size"):
from keras.losses import LossFunctionWrapper from keras.losses import LossFunctionWrapper
return LossFunctionWrapper( return LossFunctionWrapper(
self.loss, reduction=reduction, name=self.name) self.loss, reduction=reduction, name=self.name)
class StandardKerasLoss(Loss): class StandardKerasLoss(Loss):
""" """
A loss function supported by Keras, such as MSE. A loss function supported by Keras, such as MSE.
...@@ -109,6 +111,9 @@ class MSEWithInequalities(Loss): ...@@ -109,6 +111,9 @@ class MSEWithInequalities(Loss):
supports_inequalities = True supports_inequalities = True
supports_multiple_outputs = False supports_multiple_outputs = False
def __init__(self, transform_function=None):
self.transform_function = transform_function
@staticmethod @staticmethod
def encode_y(y, inequalities=None): def encode_y(y, inequalities=None):
y = array(y, dtype="float32") y = array(y, dtype="float32")
...@@ -133,13 +138,17 @@ class MSEWithInequalities(Loss): ...@@ -133,13 +138,17 @@ class MSEWithInequalities(Loss):
assert not isnan(encoded).any() assert not isnan(encoded).any()
return encoded return encoded
@staticmethod def loss(self, y_true, y_pred):
def loss(y_true, y_pred):
# We always delay import of Keras so that mhcflurry can be imported # We always delay import of Keras so that mhcflurry can be imported
# initially without tensorflow debug output, etc. # initially without tensorflow debug output, etc.
from keras import backend as K from keras import backend as K
import tensorflow as tf import tensorflow as tf
if self.transform_function:
y_pred = self.transform_function(y_pred)
y_true = K.squeeze(y_true, axis=-1)
# Handle (=) inequalities # Handle (=) inequalities
diff1 = y_pred - y_true diff1 = y_pred - y_true
diff1 *= K.cast(y_true >= 0.0, "float32") diff1 *= K.cast(y_true >= 0.0, "float32")
...@@ -160,7 +169,10 @@ class MSEWithInequalities(Loss): ...@@ -160,7 +169,10 @@ class MSEWithInequalities(Loss):
K.sum(K.square(diff1)) + K.sum(K.square(diff1)) +
K.sum(K.square(diff2)) + K.sum(K.square(diff2)) +
K.sum(K.square(diff3))) / K.cast(K.shape(y_pred)[0], "float32") K.sum(K.square(diff3))) / K.cast(K.shape(y_pred)[0], "float32")
return tf.where(tf.is_nan(result), tf.zeros_like(result), result)
return result
#return tf.where(tf.is_nan(result), tf.zeros_like(result), result)
class MSEWithInequalitiesAndMultipleOutputs(Loss): class MSEWithInequalitiesAndMultipleOutputs(Loss):
...@@ -188,6 +200,9 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss): ...@@ -188,6 +200,9 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss):
supports_inequalities = True supports_inequalities = True
supports_multiple_outputs = True supports_multiple_outputs = True
def __init__(self, transform_function=None):
self.transform_function = transform_function
@staticmethod @staticmethod
def encode_y(y, inequalities=None, output_indices=None): def encode_y(y, inequalities=None, output_indices=None):
y = array(y, dtype="float32") y = array(y, dtype="float32")
...@@ -211,10 +226,12 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss): ...@@ -211,10 +226,12 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss):
return encoded return encoded
@staticmethod def loss(self, y_true, y_pred):
def loss(y_true, y_pred):
from keras import backend as K from keras import backend as K
if self.transform_function:
y_pred = self.transform_function(y_pred)
y_true = K.flatten(y_true) y_true = K.flatten(y_true)
output_indices = y_true // 10 output_indices = y_true // 10
...@@ -236,7 +253,7 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss): ...@@ -236,7 +253,7 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss):
# ], axis=-1) # ], axis=-1)
#updated_y_pred = tf.gather_nd(y_pred, indexer) #updated_y_pred = tf.gather_nd(y_pred, indexer)
return MSEWithInequalities.loss(updated_y_true, updated_y_pred) return MSEWithInequalities().loss(updated_y_true, updated_y_pred)
class MultiallelicMassSpecLoss(Loss): class MultiallelicMassSpecLoss(Loss):
...@@ -261,10 +278,7 @@ class MultiallelicMassSpecLoss(Loss): ...@@ -261,10 +278,7 @@ class MultiallelicMassSpecLoss(Loss):
def loss(self, y_true, y_pred): def loss(self, y_true, y_pred):
import tensorflow as tf import tensorflow as tf
y_pred = tf.squeeze(y_pred, axis=-1)
y_true = tf.reshape(y_true, (-1,)) y_true = tf.reshape(y_true, (-1,))
pos = tf.boolean_mask(y_pred, tf.math.equal(y_true, 1.0)) pos = tf.boolean_mask(y_pred, tf.math.equal(y_true, 1.0))
pos_max = tf.reduce_max(pos, axis=1) pos_max = tf.reduce_max(pos, axis=1)
neg = tf.boolean_mask(y_pred, tf.math.equal(y_true, 0.0)) neg = tf.boolean_mask(y_pred, tf.math.equal(y_true, 0.0))
...@@ -274,23 +288,6 @@ class MultiallelicMassSpecLoss(Loss): ...@@ -274,23 +288,6 @@ class MultiallelicMassSpecLoss(Loss):
return tf.where(tf.is_nan(result), 0.0, result) return tf.where(tf.is_nan(result), 0.0, result)
class ZeroLoss(Loss):
"""
"""
name = "zero_loss"
supports_inequalities = False
supports_multiple_outputs = False
@staticmethod
def encode_y(y):
return y
@staticmethod
def loss(y_true, y_pred):
import keras.backend as K
return K.constant(0.0)
def check_shape(name, arr, expected_shape): def check_shape(name, arr, expected_shape):
""" """
Raise ValueError if arr.shape != expected_shape. Raise ValueError if arr.shape != expected_shape.
...@@ -308,7 +305,10 @@ def check_shape(name, arr, expected_shape): ...@@ -308,7 +305,10 @@ def check_shape(name, arr, expected_shape):
# Register custom losses. # Register custom losses.
for cls in [MSEWithInequalities, MSEWithInequalitiesAndMultipleOutputs, MultiallelicMassSpecLoss, ZeroLoss]: for cls in [
MSEWithInequalities,
MSEWithInequalitiesAndMultipleOutputs,
MultiallelicMassSpecLoss]:
CUSTOM_LOSSES[cls.name] = cls() CUSTOM_LOSSES[cls.name] = cls()
...@@ -2,7 +2,7 @@ six ...@@ -2,7 +2,7 @@ six
numpy>=1.11 numpy>=1.11
pandas>=0.20.3 pandas>=0.20.3
Keras>=2.2.5 Keras>=2.2.5
tensorflow>=1.1.0,<2.0.0 tensorflow>=1.15.0,<2.0.0
appdirs appdirs
scikit-learn scikit-learn
mhcnames mhcnames
......
...@@ -91,13 +91,16 @@ def teardown(): ...@@ -91,13 +91,16 @@ def teardown():
cleanup() cleanup()
def Xtest_basic(): def test_basic():
affinity_predictor = PAN_ALLELE_PREDICTOR_NO_MASS_SPEC affinity_predictor = PAN_ALLELE_PREDICTOR_NO_MASS_SPEC
models = [] models = []
for affinity_network in affinity_predictor.class1_pan_allele_models: for affinity_network in affinity_predictor.class1_pan_allele_models:
presentation_network = Class1PresentationNeuralNetwork( presentation_network = Class1PresentationNeuralNetwork(
max_epochs=1, optimizer="adam",
learning_rate=0.0001, random_negative_rate=0.0,
random_negative_constant=0,
max_epochs=5,
learning_rate=0.001,
batch_generator_batch_size=256) batch_generator_batch_size=256)
presentation_network.load_from_class1_neural_network(affinity_network) presentation_network.load_from_class1_neural_network(affinity_network)
print(presentation_network.network.get_config()) print(presentation_network.network.get_config())
...@@ -124,6 +127,8 @@ def Xtest_basic(): ...@@ -124,6 +127,8 @@ def Xtest_basic():
merged_df = pandas.merge( merged_df = pandas.merge(
df, df_predictor.set_index("peptide"), left_index=True, right_index=True) df, df_predictor.set_index("peptide"), left_index=True, right_index=True)
print(merged_df)
assert_allclose( assert_allclose(
merged_df["tightest_affinity"], merged_df["affinity"], rtol=1e-5) merged_df["tightest_affinity"], merged_df["affinity"], rtol=1e-5)
assert_allclose( assert_allclose(
...@@ -157,7 +162,10 @@ def Xtest_basic(): ...@@ -157,7 +162,10 @@ def Xtest_basic():
# Test that fitting a model changes the predictor but not the original model # Test that fitting a model changes the predictor but not the original model
train_df = pandas.DataFrame({ train_df = pandas.DataFrame({
"peptide": random_peptides(256), "peptide": numpy.concatenate([
random_peptides(256, length=length)
for length in range(8,16)
]),
}) })
train_df["allele"] = "HLA-A*02:20" train_df["allele"] = "HLA-A*02:20"
train_df["experiment"] = "experiment1" train_df["experiment"] = "experiment1"
...@@ -167,7 +175,7 @@ def Xtest_basic(): ...@@ -167,7 +175,7 @@ def Xtest_basic():
experiment_to_allele_list={ experiment_to_allele_list={
"experiment1": ["HLA-A*02:20", "HLA-A*02:01"], "experiment1": ["HLA-A*02:20", "HLA-A*02:01"],
}, },
allele_to_sequence=predictor4.allele_to_sequence) allele_to_sequence=predictor.allele_to_sequence)
model = predictor4.models[0] model = predictor4.models[0]
new_predictor = Class1PresentationPredictor( new_predictor = Class1PresentationPredictor(
models=[model], models=[model],
...@@ -182,7 +190,7 @@ def Xtest_basic(): ...@@ -182,7 +190,7 @@ def Xtest_basic():
train_df.peptide.values, train_df.peptide.values,
alleles=["HLA-A*02:20"]) alleles=["HLA-A*02:20"])
print(train_df) print(train_df)
import ipdb ; ipdb.set_trace() #import ipdb ; ipdb.set_trace()
def scramble_peptide(peptide): def scramble_peptide(peptide):
...@@ -447,7 +455,7 @@ def Xtest_real_data_multiallelic_refinement(max_epochs=10): ...@@ -447,7 +455,7 @@ def Xtest_real_data_multiallelic_refinement(max_epochs=10):
import ipdb ; ipdb.set_trace() import ipdb ; ipdb.set_trace()
def test_synthetic_allele_refinement_with_affinity_data( def Xtest_synthetic_allele_refinement_with_affinity_data(
max_epochs=10, include_affinities=False): max_epochs=10, include_affinities=False):
refine_allele = "HLA-C*01:02" refine_allele = "HLA-C*01:02"
alleles = [ alleles = [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment