Skip to content
Snippets Groups Projects
Commit b5f9a292 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fixes

parent e9b2576e
No related branches found
No related tags found
No related merge requests found
......@@ -58,9 +58,12 @@ class BatchPlan(object):
for indices in self.batch_indices_generator(epochs=epochs):
batch_x_dict = {}
for (item, value) in x_dict.items():
assert not numpy.isnan(value[indices]).any(), (item, value)
batch_x_dict[item] = value[indices]
batch_y_list = []
for value in y_list:
assert not numpy.isnan(value[indices]).any(), (
len(batch_y_list), value)
batch_y_list.append(value[indices])
yield (batch_x_dict, batch_y_list)
......
......@@ -17,10 +17,7 @@ from .random_negative_peptides import RandomNegativePeptides
from .allele_encoding import MultipleAlleleEncoding, AlleleEncoding
from .auxiliary_input import AuxiliaryInputEncoder
from .batch_generator import MultiallelicMassSpecBatchGenerator
from .custom_loss import (
MSEWithInequalities,
MultiallelicMassSpecLoss,
ZeroLoss)
from .custom_loss import MSEWithInequalities, MultiallelicMassSpecLoss
class Class1PresentationNeuralNetwork(object):
......@@ -97,6 +94,7 @@ class Class1PresentationNeuralNetwork(object):
Dense,
Flatten,
RepeatVector,
Reshape,
concatenate,
Activation,
Lambda,
......@@ -178,13 +176,18 @@ class Class1PresentationNeuralNetwork(object):
node = layer(input_nodes)
layer_name_to_new_node[layer.name] = node
node = Reshape(
(self.hyperparameters['max_alleles'],),
name="unmasked_affinity_matrix_output")(node)
pre_mask_affinity_predictor_matrix_output = node
# Apply allele mask: zero out all outputs corresponding to alleles
# with the special index 0.
def alleles_to_mask(x):
import keras.backend as K
return K.cast(K.expand_dims(K.not_equal(x, 0.0)), "float32")
result = K.cast(K.not_equal(x, 0), "float32")
return result
allele_mask = Lambda(alleles_to_mask, name="allele_mask")(input_alleles)
......@@ -193,12 +196,10 @@ class Class1PresentationNeuralNetwork(object):
allele_mask,
pre_mask_affinity_predictor_matrix_output
])
# First allele (i.e. the first column of the alleles matrix) is given
# its own output. This is used for the affinity prediction loss.
affinity_predictor_output = Lambda(
lambda x: x[:, 0], name="affinity_output")(
affinity_predictor_matrix_output)
node = Reshape(
(self.hyperparameters['max_alleles'], 1),
name="expand_dims_affinity_matrix_output")(
affinity_predictor_matrix_output)
auxiliary_input = None
if self.hyperparameters['auxiliary_input_features']:
......@@ -228,13 +229,20 @@ class Class1PresentationNeuralNetwork(object):
bias_initializer=Zeros())
lifted = TimeDistributed(layer, name="presentation_adjustment")
presentation_adjustment = lifted(node)
presentation_adjustment = Reshape(
target_shape=(self.hyperparameters['max_alleles'],),
name="reshaped_presentation_adjustment")(presentation_adjustment)
def logit(x):
import tensorflow as tf
return - tf.log(1. / x - 1.)
return - tf.log(
tf.maximum(
tf.math.divide_no_nan(1., x) - 1.,
0.0))
presentation_output_pre_sigmoid = Add()([
Lambda(logit)(affinity_predictor_matrix_output),
Lambda(logit, name="logit")(affinity_predictor_matrix_output),
presentation_adjustment,
])
pre_mask_presentation_output = Activation(
......@@ -254,9 +262,8 @@ class Class1PresentationNeuralNetwork(object):
input_alleles,
] + ([] if auxiliary_input is None else [auxiliary_input]),
outputs=[
affinity_predictor_output,
affinity_predictor_matrix_output,
presentation_output,
affinity_predictor_matrix_output
],
name="presentation",
)
......@@ -405,7 +412,12 @@ class Class1PresentationNeuralNetwork(object):
y1,
])
affinities_loss = MSEWithInequalities()
def keras_max(matrix):
import keras.backend as K
result = K.max(matrix, axis=1)
return result
affinities_loss = MSEWithInequalities(transform_function=keras_max)
encoded_y1 = affinities_loss.encode_y(
y1_with_random_negatives,
inequalities=adjusted_inequalities_with_random_negative)
......@@ -426,9 +438,12 @@ class Class1PresentationNeuralNetwork(object):
allele_representations_hash = self.set_allele_representations(
allele_representations)
loss_reduction = "none"
self.network.compile(
loss=[affinities_loss.keras_wrapped(), mms_loss.keras_wrapped(), ZeroLoss().keras_wrapped()],
#loss_weights=[1.0, 1.0, 1.0],
loss=[
affinities_loss.get_keras_loss(reduction=loss_reduction),
mms_loss.get_keras_loss(reduction=loss_reduction),
],
optimizer=self.hyperparameters['optimizer'])
if self.hyperparameters['learning_rate'] is not None:
K.set_value(
......@@ -527,7 +542,7 @@ class Class1PresentationNeuralNetwork(object):
(train_generator, test_generator) = (
batch_generator.get_train_and_test_generators(
x_dict=x_dict_with_random_negatives,
y_list=[encoded_y1, encoded_y2, encoded_y2],
y_list=[encoded_y1, encoded_y2],
epochs=1))
self.assert_allele_representations_hash(allele_representations_hash)
fit_history = self.network.fit_generator(
......@@ -547,7 +562,6 @@ class Class1PresentationNeuralNetwork(object):
fit_info[key].extend(value)
if numpy.isnan(fit_info['loss'][-1]):
import ipdb ; ipdb.set_trace()
raise ValueError("NaN loss")
# Print progress no more often than once every few seconds.
......@@ -567,7 +581,6 @@ class Class1PresentationNeuralNetwork(object):
last_progress_print = time.time()
if batch_generator.num_test_batches:
#import ipdb ; ipdb.set_trace()
val_loss = fit_info['val_loss'][-1]
if min_val_loss is None or (
val_loss < min_val_loss -
......@@ -609,7 +622,7 @@ class Class1PresentationNeuralNetwork(object):
Predictions = collections.namedtuple(
"ligandone_neural_network_predictions",
"score affinity")
"affinity score")
def predict(
self,
......@@ -638,11 +651,8 @@ class Class1PresentationNeuralNetwork(object):
feature_parameters=self.hyperparameters[
'auxiliary_input_feature_parameters'])
predictions = self.Predictions._make([
numpy.squeeze(output)
for output in self.network.predict(
x_dict, batch_size=batch_size)[1:]
])
predictions = self.Predictions._make(
self.network.predict(x_dict, batch_size=batch_size))
return predictions
def set_allele_representations(self, allele_representations):
......
......@@ -18,24 +18,15 @@ import pandas
import mhcnames
from .hyperparameters import HyperparameterDefaults
from .version import __version__
from .class1_neural_network import Class1NeuralNetwork, DEFAULT_PREDICT_BATCH_SIZE
from .encodable_sequences import EncodableSequences
from .regression_target import from_ic50, to_ic50
from .random_negative_peptides import RandomNegativePeptides
from .allele_encoding import MultipleAlleleEncoding, AlleleEncoding
from .auxiliary_input import AuxiliaryInputEncoder
from .batch_generator import MultiallelicMassSpecBatchGenerator
from .custom_loss import (
MSEWithInequalities,
MultiallelicMassSpecLoss,
ZeroLoss)
from .allele_encoding import MultipleAlleleEncoding
from .downloads import get_default_class1_presentation_models_dir
from .class1_presentation_neural_network import Class1PresentationNeuralNetwork
from .common import save_weights, load_weights
class Class1PresentationPredictor(object):
def __init__(
self,
......
......@@ -57,13 +57,15 @@ class Loss(object):
def __str__(self):
return "<Loss: %s>" % self.name
def keras_wrapped(self, reduction="sum_over_batch_size"):
def loss(self, y_true, y_pred):
raise NotImplementedError()
def get_keras_loss(self, reduction="sum_over_batch_size"):
from keras.losses import LossFunctionWrapper
return LossFunctionWrapper(
self.loss, reduction=reduction, name=self.name)
class StandardKerasLoss(Loss):
"""
A loss function supported by Keras, such as MSE.
......@@ -109,6 +111,9 @@ class MSEWithInequalities(Loss):
supports_inequalities = True
supports_multiple_outputs = False
def __init__(self, transform_function=None):
self.transform_function = transform_function
@staticmethod
def encode_y(y, inequalities=None):
y = array(y, dtype="float32")
......@@ -133,13 +138,17 @@ class MSEWithInequalities(Loss):
assert not isnan(encoded).any()
return encoded
@staticmethod
def loss(y_true, y_pred):
def loss(self, y_true, y_pred):
# We always delay import of Keras so that mhcflurry can be imported
# initially without tensorflow debug output, etc.
from keras import backend as K
import tensorflow as tf
if self.transform_function:
y_pred = self.transform_function(y_pred)
y_true = K.squeeze(y_true, axis=-1)
# Handle (=) inequalities
diff1 = y_pred - y_true
diff1 *= K.cast(y_true >= 0.0, "float32")
......@@ -160,7 +169,10 @@ class MSEWithInequalities(Loss):
K.sum(K.square(diff1)) +
K.sum(K.square(diff2)) +
K.sum(K.square(diff3))) / K.cast(K.shape(y_pred)[0], "float32")
return tf.where(tf.is_nan(result), tf.zeros_like(result), result)
return result
#return tf.where(tf.is_nan(result), tf.zeros_like(result), result)
class MSEWithInequalitiesAndMultipleOutputs(Loss):
......@@ -188,6 +200,9 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss):
supports_inequalities = True
supports_multiple_outputs = True
def __init__(self, transform_function=None):
self.transform_function = transform_function
@staticmethod
def encode_y(y, inequalities=None, output_indices=None):
y = array(y, dtype="float32")
......@@ -211,10 +226,12 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss):
return encoded
@staticmethod
def loss(y_true, y_pred):
def loss(self, y_true, y_pred):
from keras import backend as K
if self.transform_function:
y_pred = self.transform_function(y_pred)
y_true = K.flatten(y_true)
output_indices = y_true // 10
......@@ -236,7 +253,7 @@ class MSEWithInequalitiesAndMultipleOutputs(Loss):
# ], axis=-1)
#updated_y_pred = tf.gather_nd(y_pred, indexer)
return MSEWithInequalities.loss(updated_y_true, updated_y_pred)
return MSEWithInequalities().loss(updated_y_true, updated_y_pred)
class MultiallelicMassSpecLoss(Loss):
......@@ -261,10 +278,7 @@ class MultiallelicMassSpecLoss(Loss):
def loss(self, y_true, y_pred):
import tensorflow as tf
y_pred = tf.squeeze(y_pred, axis=-1)
y_true = tf.reshape(y_true, (-1,))
pos = tf.boolean_mask(y_pred, tf.math.equal(y_true, 1.0))
pos_max = tf.reduce_max(pos, axis=1)
neg = tf.boolean_mask(y_pred, tf.math.equal(y_true, 0.0))
......@@ -274,23 +288,6 @@ class MultiallelicMassSpecLoss(Loss):
return tf.where(tf.is_nan(result), 0.0, result)
class ZeroLoss(Loss):
"""
"""
name = "zero_loss"
supports_inequalities = False
supports_multiple_outputs = False
@staticmethod
def encode_y(y):
return y
@staticmethod
def loss(y_true, y_pred):
import keras.backend as K
return K.constant(0.0)
def check_shape(name, arr, expected_shape):
"""
Raise ValueError if arr.shape != expected_shape.
......@@ -308,7 +305,10 @@ def check_shape(name, arr, expected_shape):
# Register custom losses.
for cls in [MSEWithInequalities, MSEWithInequalitiesAndMultipleOutputs, MultiallelicMassSpecLoss, ZeroLoss]:
for cls in [
MSEWithInequalities,
MSEWithInequalitiesAndMultipleOutputs,
MultiallelicMassSpecLoss]:
CUSTOM_LOSSES[cls.name] = cls()
......@@ -2,7 +2,7 @@ six
numpy>=1.11
pandas>=0.20.3
Keras>=2.2.5
tensorflow>=1.1.0,<2.0.0
tensorflow>=1.15.0,<2.0.0
appdirs
scikit-learn
mhcnames
......
......@@ -91,13 +91,16 @@ def teardown():
cleanup()
def Xtest_basic():
def test_basic():
affinity_predictor = PAN_ALLELE_PREDICTOR_NO_MASS_SPEC
models = []
for affinity_network in affinity_predictor.class1_pan_allele_models:
presentation_network = Class1PresentationNeuralNetwork(
max_epochs=1,
learning_rate=0.0001,
optimizer="adam",
random_negative_rate=0.0,
random_negative_constant=0,
max_epochs=5,
learning_rate=0.001,
batch_generator_batch_size=256)
presentation_network.load_from_class1_neural_network(affinity_network)
print(presentation_network.network.get_config())
......@@ -124,6 +127,8 @@ def Xtest_basic():
merged_df = pandas.merge(
df, df_predictor.set_index("peptide"), left_index=True, right_index=True)
print(merged_df)
assert_allclose(
merged_df["tightest_affinity"], merged_df["affinity"], rtol=1e-5)
assert_allclose(
......@@ -157,7 +162,10 @@ def Xtest_basic():
# Test that fitting a model changes the predictor but not the original model
train_df = pandas.DataFrame({
"peptide": random_peptides(256),
"peptide": numpy.concatenate([
random_peptides(256, length=length)
for length in range(8,16)
]),
})
train_df["allele"] = "HLA-A*02:20"
train_df["experiment"] = "experiment1"
......@@ -167,7 +175,7 @@ def Xtest_basic():
experiment_to_allele_list={
"experiment1": ["HLA-A*02:20", "HLA-A*02:01"],
},
allele_to_sequence=predictor4.allele_to_sequence)
allele_to_sequence=predictor.allele_to_sequence)
model = predictor4.models[0]
new_predictor = Class1PresentationPredictor(
models=[model],
......@@ -182,7 +190,7 @@ def Xtest_basic():
train_df.peptide.values,
alleles=["HLA-A*02:20"])
print(train_df)
import ipdb ; ipdb.set_trace()
#import ipdb ; ipdb.set_trace()
def scramble_peptide(peptide):
......@@ -447,7 +455,7 @@ def Xtest_real_data_multiallelic_refinement(max_epochs=10):
import ipdb ; ipdb.set_trace()
def test_synthetic_allele_refinement_with_affinity_data(
def Xtest_synthetic_allele_refinement_with_affinity_data(
max_epochs=10, include_affinities=False):
refine_allele = "HLA-C*01:02"
alleles = [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment