背景:使用提取方法进行文本汇总。
我正在关注的文章-link。
编辑1 链接到colab
我网络中的最后一层使用从多个输入中提取的特征进行分类。
输入 :(?表示批处理大小)
d = document_embeddings形状=(?,400)
s =句子嵌入的形状=(?,10,400)
(说明-每个文档10个句子)
h_state =产生形状为(?,10,400)的document_embedddings的LSTM的h_state(说明-LSTM中的时间戳是10,对应于每个文档中的10个句子,大小是400)< / p>
输出:
在最后一层中,我使用这些输入来计算要素:
C_j = Wc * s_j
M_j = s_j.T * W_s * d
N_j = s_j.T * W_r * tanh(o_j),
P_j = W_p * h_state
O_j是文档的摘要表示。并通过将到目前为止每个句子_嵌入的相乘之和除以其出现在摘要中的概率来计算。
for i in range(j-1):
sum += S_i * prob_in_summary(S_i)
句子i的prob_in_summary计算公式为:
sigmoid(C_i + M_i - N_j + P_j + b)
现在。使整个模型最小化的损失函数是观察到的标签的负对数似然(伪代码)
loss(Wieghts, bias) =
for doc..
for sentence..
sent_label * log(prob(sent_label == 1 | S_emb, O_j, D_emb)) +
(1-sent_label) * log(1-prob(sent_label==1 | S_emb, O_j, D_emb))
我的问题是:
到目前为止,我的代码:
自定义层:
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
self.W_c = self.add_weight(name='W_c', shape=(1,), initializer='uniform',trainable=True)
self.W_s = self.add_weight(name='W_s', shape=(1,), initializer='uniform',trainable=True)
# self.W_r = self.add_weight(name='W_r', shape=(1,), initializer='uniform',trainable=True)
self.W_p = self.add_weight(name='W_p', shape=(1,), initializer='uniform',trainable=True)
# self.bias = self.add_weight(name='bias', shape=(1,), initializer='uniform',trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
assert isinstance(x, list)
document_embedding, sentences_embeddings_stacked, state_h = x
content_richness = self.W_c * sentences_embeddings_stacked
print("content_richness", content_richness.shape)
print("sentences_embeddings_stacked", sentences_embeddings_stacked.shape)
print("document_embedding", document_embedding.shape)
print("document_embedding_repeat", K.repeat(document_embedding, 10).shape)
novelty = sentences_embeddings_stacked * self.W_s # TODO transpose, * K.repeat(document_embedding, 10)
print("novelty", novelty.shape)
print("state_h", state_h.shape)
position = self.W_p * state_h
print("position", position.shape)
return content_richness
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b, shape_c = input_shape
# TODO what to put here? needs to be (?,10,1) or (?, 10) because 1/0 for each sentence in doc and there are 10 sentences
return [(shape_a[0], self.output_dim), shape_b[:-1]]
自定义损失:
答案 0 :(得分:0)
解决了。必须在我的自定义层中处理批处理大小。还有一些堆积和分裂。
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.W_p = self.add_weight(name='W_p',
shape=(400,),
initializer='uniform',
trainable=True)
self.W_c = self.add_weight(name='W_c',
shape=(400,),
initializer='uniform',
trainable=True)
self.W_s = self.add_weight(name='W_s',
shape=(400,),
initializer='uniform',
trainable=True)
self.W_r = self.add_weight(name='W_r',
shape=(400,),
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
def compute_sentence_features(d, sentences_embeddings_stacked, p_j, j, sentences_probs):
s = sentences_embeddings_stacked[:, j]
c = s * self.W_c
m = s * self.W_s * d # missing transpose
o = 0
if j == 0:
o = sentences_embeddings_stacked[:, 0] * 0.5
else:
for i in range(0, j):
o += sentences_embeddings_stacked[:, i] * sentences_probs[i]
n = s * self.W_r * K.tanh(o) # missing transpose
p = self.W_p * p_j
return c, m, n, p, o
def compute_sentence_prob(features):
c, m, n, p = features
sentece_prob = K.sigmoid(c + m - n + p)
return sentece_prob
document_embedding, sentences_embeddings_stacked, doc_lstm = x
O = []
sentences_probs = []
for j in range(0, 9):
c, m, n, p, o = compute_sentence_features(document_embedding, sentences_embeddings_stacked, doc_lstm[:, j], j, sentences_probs)
print("c,m,n,p,o", c, m, n, p, o)
sentences_probs.append(compute_sentence_prob((c, m, n, p)))
O.append(o)
sentences_probs_stacked = tf.stack(sentences_probs, axis=1)
dense4output10= Dense(10, input_shape=(400,))(K.sum(sentences_probs_stacked, axis=1))
output = K.softmax(dense4output10) # missing bias
print("output", output)
return output
def compute_output_shape(self, input_shape):
return input_shape[0][0], self.output_dim