试图实施this文章。
Edit1 :发现一个错误,我的输出大小是10,而不是1。(每个句子一个数字,每个文档有10个句子)
Edit2 :我又遇到了一个错误,该错误涉及批处理大小。当我将其设置为10时,模型会训练(!!!!)。但我认为这是不正确的方法...我给批次大小3的错误是
编辑3 已解决!大小的东西+ BIDIRECTIONAL返回的东西与LSTM不同,因此我需要自我介绍。将正确的代码放在答案中。
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Incompatible shapes: [10] vs. [3]
[[{{node training_5/Adam/gradients/loss_8/dense_61_loss/mul_grad/BroadcastGradientArgs}}]]
[[metrics_8/acc/Mean_1/_5481]]
(1) Invalid argument: Incompatible shapes: [10] vs. [3]
[[{{node training_5/Adam/gradients/loss_8/dense_61_loss/mul_grad/BroadcastGradientArgs}}]]
0 successful operations.
0 derived errors ignored.
目标是提取文档摘要。
Link与代码协作
他们所做的是(您可以在第3页的图片中看到)
文档的每个句子100个BI_LSTM +注意。
合并这些文件并将其插入1 BI_LSTM + Attention以获取文档嵌入。
使用LSTM中的文档嵌入和隐藏状态来获取某些功能。
根据功能分类
在与keras低级api进行了很多斗争之后,我得到了一个简单的版本。 我要做的是获取已经嵌入的句子,然后执行最后一个LSTM。 或者将单词嵌入句子中,并使句子的小单元LSTM起作用。
现在我正试图将所有东西放在一起,但无法容纳合适的尺寸。
我的输入大小是
文档数*句子文档数*单词句数*单词嵌入 在代码中,我将其设置为20 * 10 * 50 * 100 (文档中的10句话表示,现在所有内容都可以更快地运行。)。
我的输出是
10 * 1表示每个句子,如果它是文档摘要的一部分,我将得到1/0。 (我尚未进行特征提取部分,我只是使用另一个密集层来给我概率。)
我认为问题出在这部分代码
X_doc = Lambda(lambda x: x[:,t, :, :])(X)
带有示例数据的代码
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np
import keras
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tensorflow as tf
from keras import backend as K
num_of_training_examples = 20
words_in_sentence = 50 # max words per sentence
sentences_in_doc = 10
model_output_size = 10
word_embeddings_size = 100
lstm_hidden_size = 200
densor1_output_size = 400
densor2_output_size = 400
x_train = np.random.rand(num_of_training_examples, sentences_in_doc, words_in_sentence, word_embeddings_size)
y_train= np.random.randint(2, size=(num_of_training_examples, sentences_in_doc))
print(x_train.shape)
print(y_train.shape)
# Initialize arrays
inputs = []
bi_lstms = []
densors_1 =[]
densors_2 = []
for i in range(sentences_in_doc):
bi_lstms.append(Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(words_in_sentence, word_embeddings_size),
return_sequences=False, name='bidirectional_' + str(i)), merge_mode='concat'))
densors_1.append(Dense(densor1_output_size, activation = "tanh"))
densors_2.append(Dense(densor2_output_size, activation = "softmax"))
def invoke_sentence(sentence_matrix, index):
if index==0:
print(type(sentence_matrix))
print(tf.shape(sentence_matrix))
Ys = bi_lstms[index](sentence_matrix)
attention_middle = densors_1[index](Ys)
output = densors_2[index](attention_middle)
if index==0:
print(f'Ys shape is {Ys.shape}')
print(f'attention_middle shape is {attention_middle.shape}')
print(f'output shape is {output.shape}')
return output
def model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size):
"""
Arguments:
words_in_sentence -- Tx -- length of the input sequence - max words per sentence
sentences_in_doc --Ty -- length of the output sequence - number of sentences per document
lstm_hidden_size -- hidden state size of the Bi-LSTM
word_embeddings_size -- size of the word embeddings
model_output_size -- size of each sentence label (1 or 0)
Returns:
model -- Keras model instance
"""
sentences_embeddings = []
X = Input(shape=(sentences_in_doc , words_in_sentence, word_embeddings_size), name= 'X')
for t in range(Ty):
X_doc = Lambda(lambda x: x[:,t, :, :])(X)
print(type(X_doc))
print(X_doc)
print(X_doc.shape)
sentences_embeddings.append(invoke_sentence(X_doc, t))
sentences_embeddings_stacked = Lambda(lambda x: tf.stack(x, axis=0))(sentences_embeddings)
Ys = Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(sentences_in_doc , lstm_hidden_size*2),
return_sequences=False, name='bidirectional_document'),
merge_mode='concat')(sentences_embeddings_stacked)
attention_middle = Dense(densor1_output_size, activation = "tanh")(Ys)
document_embedding = Dense(densor2_output_size, activation = "softmax")(attention_middle)
outputs = Dense(model_output_size, activation = "softmax")(document_embedding)
# compute_features(document_embeddings, sentences_embeddings, ys)
model = Model(inputs=X, outputs=outputs)
return model
model = model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size)
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x = x_train, y = y_train, batch_size=2, epochs=1)
答案 0 :(得分:0)
# Sizes
num_of_training_examples = 20
words_in_sentence = 50 # max words per sentence
sentences_in_doc = 10
model_output_size = 10
word_embeddings_size = 100
lstm_hidden_size = 200
densor1_output_size = 400
densor2_output_size = 400
# Data
x_train = np.random.rand(num_of_training_examples, sentences_in_doc, words_in_sentence, word_embeddings_size)
y_train= np.random.randint(2, size=(num_of_training_examples, sentences_in_doc))
print(x_train.shape)
print(y_train.shape)
# Initialize arrays
inputs = []
bi_lstms = []
densors_1 =[]
densors_2 = []
for i in range(sentences_in_doc):
bi_lstms.append(Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(words_in_sentence, word_embeddings_size),
return_sequences=True, return_state=True, name='bidirectional_' + str(i))))
densors_1.append(Dense(densor1_output_size, activation = "tanh",name='senteence_dense_tanh' + str(i)))
densors_2.append(Dense(densor2_output_size, activation = "softmax",name='senteence_dense_softmax' + str(i)))
def invoke_sentence(sentence_matrix, index):
if index==0:
print(type(sentence_matrix))
print(tf.shape(sentence_matrix))
lstm, forward_h, forward_c, backward_h, backward_c = bi_lstms[index](sentence_matrix)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
attention_middle = densors_1[index](state_h)
output = densors_2[index](attention_middle)
if index==0:
print(f'lstm shape is {lstm.shape}')
print(f'state_h shape is {state_h.shape}')
print(f'state_c shape is {state_c.shape}')
print(f'attention_middle shape is {attention_middle.shape}')
print(f'output shape is {output.shape}')
return output
def model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size):
"""
Arguments:
words_in_sentence -- Tx -- length of the input sequence - max words per sentence
sentences_in_doc --Ty -- length of the output sequence - number of sentences per document
lstm_hidden_size -- hidden state size of the Bi-LSTM
word_embeddings_size -- size of the word embeddings
model_output_size -- size of each sentence label (1 or 0)
Returns:
model -- Keras model instance
"""
sentences_embeddings = []
X = Input(shape=(sentences_in_doc, words_in_sentence, word_embeddings_size), name= 'X')
for t in range(sentences_in_doc):
X_doc = Lambda(lambda x: x[:, t,:, :])(X)
if(t==0):
print("X_doc")
print(type(X_doc))
print(X_doc)
print(X_doc.shape)
sentence_embedding = invoke_sentence(X_doc, t)
sentences_embeddings.append(sentence_embedding)
if(t==0):
print("sentence_embedding")
print(type(sentence_embedding))
print(sentence_embedding)
print(sentence_embedding.shape)
sentences_embeddings_stacked = Lambda(lambda x: tf.stack(x, axis=1))(sentences_embeddings)
print("sentences_embeddings_stacked")
print(type(sentences_embeddings_stacked))
print(sentences_embeddings_stacked)
print(sentences_embeddings_stacked.shape)
doc_lstm, doc_forward_h, doc_forward_c, doc_backward_h, doc_backward_c = Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(sentences_in_doc, lstm_hidden_size*2),
return_sequences=True, return_state=True, name='bidirectional_document'),
merge_mode='concat')(sentences_embeddings_stacked)
doc_state_h = Concatenate()([doc_forward_h, doc_backward_h])
doc_state_c = Concatenate()([doc_forward_c, doc_backward_c])
print(f'doc_lstm shape is {doc_lstm.shape}')
print(f'doc_state_h shape is {doc_state_h.shape}')
print(f'doc_state_c shape is {doc_state_c.shape}')
attention_middle = Dense(densor1_output_size, activation = "tanh")(doc_state_h)
document_embedding = Dense(densor2_output_size, activation = "softmax")(attention_middle)
print(f'document_embedding shape is {document_embedding.shape}')
# my_layer = MyLayer(input_shape=((400), (10,400), (10,400)), output_dim=2)
# custom_output = my_layer([document_embedding, sentences_embeddings_stacked, doc_state_h])
# print(f'custom_output shape is {custom_output.shape}')
outputs = Dense(model_output_size, activation = "softmax")(document_embedding)
model = Model(inputs=X, outputs=outputs)
return model
model = model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x = x_train, y = y_train, batch_size=5, epochs=1)