我尝试使用 Tensorflow 后端在Keras上使用Deep语义相似度模型(DSSM):https://github.com/airalcorn2/Deep-Semantic-Similarity-Model/blob/master/deep_semantic_similarity_keras.py。该代码适用于使用Theano后端的Keras,并且没有任何错误。
为了让它在正确使用Tensorflow后端在Keras上运行,我将一些Tensor属性修改为:
-: backend.dot(x, backend.transpose(y)) / (x.norm(2) * y.norm(2))
+: backend.dot(backend.l2_normalize(x, 0), backend.transpose(backend.l2_normalize(y, 0)))
和
-: x.max(axis = 1)
+: backend.max(x, 1)
当我编译修改后的模型时, ValueError:logits和targets必须具有相同的形状(()vs(?,?))。我该如何避免错误?
这是修改后的代码
import keras
import numpy as np
from keras import backend
from keras.layers import Input, merge
from keras.layers.core import Dense, Lambda, Reshape
from keras.layers.convolutional import Convolution1D
from keras.models import Model
def R(vects):
(x, y) = vects
return backend.dot(backend.l2_normalize(x, 0), backend.transpose(backend.l2_normalize(y, 0)))
LETTER_GRAM_SIZE = 3
WINDOW_SIZE = 3
TOTAL_LETTER_GRAMS = int(3 * 1e4)
WORD_DEPTH = WINDOW_SIZE * TOTAL_LETTER_GRAMS
K = 300
L = 128
J = 4
FILTER_LENGTH = 1
query = Input(shape = (None, WORD_DEPTH))
pos_doc = Input(shape = (None, WORD_DEPTH))
neg_docs = [Input(shape = (None, WORD_DEPTH)) for j in range(J)]
query_conv = Convolution1D(K, FILTER_LENGTH, border_mode = "same", input_shape = (None, WORD_DEPTH), activation = "tanh")(query) # See equation (2).
query_max = Lambda(lambda x: backend.max(x, 1), output_shape = (K, ))(query_conv)
query_sem = Dense(L, activation = "tanh", input_dim = K)(query_max)
doc_conv = Convolution1D(K, FILTER_LENGTH, border_mode = "same", input_shape = (None, WORD_DEPTH), activation = "tanh")
doc_max = Lambda(lambda x: backend.max(x, 1), output_shape = (K, ))
doc_sem = Dense(L, activation = "tanh", input_dim = K)
pos_doc_conv = doc_conv(pos_doc)
neg_doc_convs = [doc_conv(neg_doc) for neg_doc in neg_docs]
pos_doc_max = doc_max(pos_doc_conv)
neg_doc_maxes = [doc_max(neg_doc_conv) for neg_doc_conv in neg_doc_convs]
pos_doc_sem = doc_sem(pos_doc_max)
neg_doc_sems = [doc_sem(neg_doc_max) for neg_doc_max in neg_doc_maxes]
R_layer = Lambda(R, output_shape = (1, ))
R_Q_D_p = R_layer([query_sem, pos_doc_sem])
R_Q_D_ns = [R_layer([query_sem, neg_doc_sem]) for neg_doc_sem in neg_doc_sems]
concat_Rs = merge([R_Q_D_p] + R_Q_D_ns, mode = "concat")
concat_Rs = Reshape((J + 1, 1))(concat_Rs)
weight = np.array([1]).reshape(1, 1, 1, 1)
with_gamma = Convolution1D(1, 1, border_mode = "same", input_shape = (J + 1, 1), activation = "linear", bias = False, weights = [weight])(concat_Rs)
exponentiated = Lambda(lambda x: backend.exp(x), output_shape = (J + 1, ))(with_gamma) # See equation (5).
exponentiated = Reshape((J + 1, ))(exponentiated)
prob = Lambda(lambda x: x[0][0] / backend.sum(x[0]), output_shape = (1, ))(exponentiated)
model = Model(input = [query, pos_doc] + neg_docs, output = prob)
model.compile(optimizer = "adadelta", loss = "binary_crossentropy")
和错误消息。
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/Users/jun/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/tensor_shape.py in merge_with(self, other)
571 try:
--> 572 self.assert_same_rank(other)
573 new_dims = []
/Users/jun/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/tensor_shape.py in assert_same_rank(self, other)
617 raise ValueError(
--> 618 "Shapes %s and %s must have the same rank" % (self, other))
619
ValueError: Shapes (?, ?) and () must have the same rank
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
/Users/jun/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/nn.py in sigmoid_cross_entropy_with_logits(logits, targets, name)
430 try:
--> 431 targets.get_shape().merge_with(logits.get_shape())
432 except ValueError:
/Users/jun/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/tensor_shape.py in merge_with(self, other)
578 raise ValueError("Shapes %s and %s are not compatible" %
--> 579 (self, other))
580
ValueError: Shapes (?, ?) and () are not compatible
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-6-a4001289be07> in <module>()
61
62 model = Model(input = [query, pos_doc] + neg_docs, output = prob)
---> 63 model.compile(optimizer = "adadelta", loss = "binary_crossentropy")
/Users/jun/anaconda/lib/python3.5/site-packages/keras/engine/training.py in compile(self, optimizer, loss, metrics, loss_weights, sample_weight_mode, **kwargs)
628 loss_weight = loss_weights_list[i]
629 output_loss = weighted_loss(y_true, y_pred,
--> 630 sample_weight, mask)
631 if len(self.outputs) > 1:
632 self.metrics_tensors.append(output_loss)
/Users/jun/anaconda/lib/python3.5/site-packages/keras/engine/training.py in weighted(y_true, y_pred, weights, mask)
330 def weighted(y_true, y_pred, weights, mask=None):
331 # score_array has ndim >= 2
--> 332 score_array = fn(y_true, y_pred)
333 if mask is not None:
334 # Cast the mask to floatX to avoid float64 upcasting in theano
/Users/jun/anaconda/lib/python3.5/site-packages/keras/objectives.py in binary_crossentropy(y_true, y_pred)
46
47 def binary_crossentropy(y_true, y_pred):
---> 48 return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1)
49
50
/Users/jun/anaconda/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in binary_crossentropy(output, target, from_logits)
1464 output = tf.clip_by_value(output, epsilon, 1 - epsilon)
1465 output = tf.log(output / (1 - output))
-> 1466 return tf.nn.sigmoid_cross_entropy_with_logits(output, target)
1467
1468
/Users/jun/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/nn.py in sigmoid_cross_entropy_with_logits(logits, targets, name)
432 except ValueError:
433 raise ValueError("logits and targets must have the same shape (%s vs %s)"
--> 434 % (logits.get_shape(), targets.get_shape()))
435
436 # The logistic loss formula from above is
ValueError: logits and targets must have the same shape (() vs (?, ?))
答案 0 :(得分:0)
(在@ dga&#39; s请求的评论转换为答案)
我问过我们的居民Keras专家,他的回答是:查看错误消息和代码,可以推断最终输出概率是标量,而它应该是2D数组(每批次输入一个标量概率) 。这个问题很可能就行了
prob = Lambda(lambda x: x[0][0] / backend.sum(x[0]), output_shape = (1, ))(exponentiated)