我正在尝试在Keras中为RNN(LSTM)实现我自己的自定义丢失功能。这是我的代码。
import sys
sys.path.insert(0, "C:\\Users\\skaul\\AppData\\Local\\Continuum\\Anaconda3\\envs\\tensorflow\\Lib\\site-packages")
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
import keras.backend as K
timesteps = 10
data_dim = 5
timesteps = 10
num_classes = 2
# expected input data shape: (batch_size, timesteps, data_dim)
model = Sequential()
model.add(LSTM(32, return_sequences=True,
input_shape=(timesteps, data_dim))) # returns a sequence of vectors of dimension 32
model.add(LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32
model.add(LSTM(32)) # return a single vector of dimension 32
model.add(Dense(2, activation='softmax'))
def custom_loss(y_true, y_pred):
ytrue = K.argmax(y_true, axis = 1)
ypred = K.argmax(y_pred, axis = 1)
true1 = ytrue
pred1 = ypred
pred0 = ypred - K.cast(K.variable(1),dtype = 'int64')
pred0 = pred0 * K.cast(K.variable(-1),dtype = 'int64')
tp = K.sum(true1*pred1) #true positives
fn = K.sum(true1*pred0) #false negatives
return K.cast(fn/tp,dtype = 'float32')
model.compile(loss = custom_loss,
optimizer='adam',
metrics=['accuracy'])
# Generate dummy training data
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))
# Generate dummy validation data
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))
y_a = np.random.random(y_train.shape)
y_b = np.random.random(y_train.shape)
out1 = K.eval(custom_loss(K.variable(y_a), K.variable(y_b)))
print(out1)
model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))
我收到以下错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-0551e4a8e8ed> in <module>()
52 print(out1)
53
---> 54 model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))
~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\Lib\site-packages\keras\models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
868 class_weight=class_weight,
869 sample_weight=sample_weight,
--> 870 initial_epoch=initial_epoch)
871
872 def evaluate(self, x, y, batch_size=32, verbose=1,
~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\Lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
1488 else:
1489 ins = x + y + sample_weights
-> 1490 self._make_train_function()
1491 f = self.train_function
1492
~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\Lib\site-packages\keras\engine\training.py in _make_train_function(self)
1012 self._collected_trainable_weights,
1013 self.constraints,
-> 1014 self.total_loss)
1015 updates = self.updates + training_updates
1016 # Gets loss and metrics. Updates weights at each call.
~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\Lib\site-packages\keras\optimizers.py in get_updates(self, params, constraints, loss)
420
421 for p, g, m, v in zip(params, grads, ms, vs):
--> 422 m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
423 v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
424 p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\ops\math_ops.py in binary_op_wrapper(x, y)
827 if not isinstance(y, sparse_tensor.SparseTensor):
828 try:
--> 829 y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
830 except TypeError:
831 # If the RHS is not a tensor, it might be a tensor aware object
c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
674 name=name,
675 preferred_dtype=preferred_dtype,
--> 676 as_ref=False)
677
678
c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype)
739
740 if ret is None:
--> 741 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
742
743 if ret is NotImplemented:
c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
111 as_ref=False):
112 _ = as_ref
--> 113 return constant(v, dtype=dtype, name=name)
114
115
c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\constant_op.py in constant(value, dtype, shape, name, verify_shape)
100 tensor_value = attr_value_pb2.AttrValue()
101 tensor_value.tensor.CopyFrom(
--> 102 tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
103 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
104 const_tensor = g.create_op(
c:\users\skaul\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape)
362 else:
363 if values is None:
--> 364 raise ValueError("None values not supported.")
365 # if dtype is provided, forces numpy array to be the type
366 # provided if possible.
ValueError: None values not supported.
这让我相信我的丢失函数返回'无'值,但是,我的代码输出0.941634
显示上面显示的错误之前。这来自语句print(out1),它测试RNN之外的损失函数。关于可能出错的任何想法?
答案 0 :(得分:0)
您可以使用tf.Print(z,[z])(z是您的变量)在return语句之前打印自定义损失中的所有变量。然后,您将知道最终返回之前它们取什么值语句已执行。问题将很明显。
答案 1 :(得分:0)
首先,我不建议使用fn/tp
作为损失函数,因为如果tp = 0,它可能导致NaN值。因此,我建议仅使用自定义损失函数作为要监视的评估指标。
model.compile(loss = 'binary_crossentropy',
optimizer='adam',
metrics=['accuracy', custom_loss])
第二,我无法重现您的错误代码。因此,我不确定这个想法是否可以解决您的问题。尝试使用张量流逻辑函数和reduce_sum函数替换计算tp和fn的方式。
def custom_loss(y_true, y_pred):
ytrue = K.argmax(y_true, axis = 1)
ypred = K.argmax(y_pred, axis = 1)
ypred_bool = tf.equal(ypred > 0, True)
ytrue_bool = tf.equal(ytrue > 0, True)
tp = tf.reduce_sum(tf.cast(tf.logical_and(ypred_bool, ytrue_bool),dtype=tf.float32),axis=0) # true positives
fn = tf.reduce_sum(tf.cast(tf.logical_and(tf.logical_not(ypred_bool), ytrue_bool),dtype=tf.float32),axis=0) # false negatives
return K.cast(fn/tp,dtype = 'float32')