keras拆分多个“迷你单元”的输入

时间:2019-08-09 18:39:28

标签: tensorflow keras

试图实施this文章。

Edit1 :发现一个错误,我的输出大小是10,而不是1。(每个句子一个数字,每个文档有10个句子)

Edit2 :我又遇到了一个错误,该错误涉及批处理大小。当我将其设置为10时,模型会训练(!!!!)。但我认为这是不正确的方法...我给批次大小3的错误是

编辑3 已解决!大小的东西+ BIDIRECTIONAL返回的东西与LSTM不同,因此我需要自我介绍。将正确的代码放在答案中。

InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument: Incompatible shapes: [10] vs. [3]
     [[{{node training_5/Adam/gradients/loss_8/dense_61_loss/mul_grad/BroadcastGradientArgs}}]]
     [[metrics_8/acc/Mean_1/_5481]]
  (1) Invalid argument: Incompatible shapes: [10] vs. [3]
     [[{{node training_5/Adam/gradients/loss_8/dense_61_loss/mul_grad/BroadcastGradientArgs}}]]
0 successful operations.
0 derived errors ignored.

目标是提取文档摘要。

Link与代码协作

他们所做的是(您可以在第3页的图片中看到)

  1. 文档的每个句子100个BI_LSTM +注意。

  2. 合并这些文件并将其插入1 BI_LSTM + Attention以获取文档嵌入。

  3. 使用LSTM中的文档嵌入和隐藏状态来获取某些功能。

  4. 根据功能分类

在与keras低级api进行了很多斗争之后,我得到了一个简单的版本。 我要做的是获取已经嵌入的句子,然后执行最后一个LSTM。 或者将单词嵌入句子中,并使句子的小单元LSTM起作用。

现在我正试图将所有东西放在一起,但无法容纳合适的尺寸。

我的输入大小是

文档数*句子文档数*单词句数*单词嵌入 在代码中,我将其设置为20 * 10 * 50 * 100 (文档中的10句话表示,现在所有内容都可以更快地运行。)。

我的输出是

10 * 1表示每个句子,如果它是文档摘要的一部分,我将得到1/0。 (我尚未进行特征提取部分,我只是使用另一个密集层来给我概率。)

我认为问题出在这部分代码

X_doc = Lambda(lambda x: x[:,t, :, :])(X)

带有示例数据的代码

from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np
import keras
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import tensorflow as tf
from keras import backend as K


num_of_training_examples = 20
words_in_sentence = 50 # max words per sentence
sentences_in_doc = 10

model_output_size = 10
word_embeddings_size = 100 
lstm_hidden_size = 200
densor1_output_size = 400
densor2_output_size = 400

x_train = np.random.rand(num_of_training_examples, sentences_in_doc, words_in_sentence, word_embeddings_size)
y_train= np.random.randint(2, size=(num_of_training_examples, sentences_in_doc))
print(x_train.shape)
print(y_train.shape)




# Initialize arrays
inputs = []
bi_lstms = []
densors_1 =[]
densors_2 = []

for i in range(sentences_in_doc):
  bi_lstms.append(Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(words_in_sentence, word_embeddings_size),
                                     return_sequences=False,  name='bidirectional_' + str(i)), merge_mode='concat'))
  densors_1.append(Dense(densor1_output_size, activation = "tanh"))
  densors_2.append(Dense(densor2_output_size, activation = "softmax"))



def invoke_sentence(sentence_matrix, index):
  if index==0:
    print(type(sentence_matrix))
    print(tf.shape(sentence_matrix))

  Ys = bi_lstms[index](sentence_matrix)
  attention_middle = densors_1[index](Ys)
  output = densors_2[index](attention_middle)

  if index==0:
    print(f'Ys shape is {Ys.shape}')
    print(f'attention_middle shape is {attention_middle.shape}')
    print(f'output shape is {output.shape}')


  return output




def model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size):
    """
    Arguments:
    words_in_sentence -- Tx -- length of the input sequence - max words per sentence
    sentences_in_doc --Ty -- length of the output sequence - number of sentences per document
    lstm_hidden_size -- hidden state size of the Bi-LSTM
    word_embeddings_size -- size of the word embeddings
    model_output_size -- size of each sentence label (1 or 0)

    Returns:
    model -- Keras model instance
    """
    sentences_embeddings = []

    X = Input(shape=(sentences_in_doc , words_in_sentence, word_embeddings_size), name= 'X')
    for t in range(Ty):
      X_doc = Lambda(lambda x: x[:,t, :, :])(X)
      print(type(X_doc))
      print(X_doc)
      print(X_doc.shape)

      sentences_embeddings.append(invoke_sentence(X_doc, t))

    sentences_embeddings_stacked = Lambda(lambda x: tf.stack(x, axis=0))(sentences_embeddings)

    Ys = Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(sentences_in_doc , lstm_hidden_size*2),
                            return_sequences=False, name='bidirectional_document'), 
                       merge_mode='concat')(sentences_embeddings_stacked)
    attention_middle = Dense(densor1_output_size, activation = "tanh")(Ys)
    document_embedding = Dense(densor2_output_size, activation = "softmax")(attention_middle)



    outputs = Dense(model_output_size, activation = "softmax")(document_embedding)
    # compute_features(document_embeddings, sentences_embeddings, ys)


    model = Model(inputs=X, outputs=outputs)

    return model



model = model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size)


model.summary()



model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x = x_train, y = y_train, batch_size=2, epochs=1)

1 个答案:

答案 0 :(得分:0)

# Sizes
num_of_training_examples = 20
words_in_sentence = 50 # max words per sentence
sentences_in_doc = 10

model_output_size = 10
word_embeddings_size = 100 
lstm_hidden_size = 200
densor1_output_size = 400
densor2_output_size = 400

# Data
x_train = np.random.rand(num_of_training_examples, sentences_in_doc, words_in_sentence, word_embeddings_size)
y_train= np.random.randint(2, size=(num_of_training_examples, sentences_in_doc))
print(x_train.shape)
print(y_train.shape)

# Initialize arrays
inputs = []
bi_lstms = []
densors_1 =[]
densors_2 = []

for i in range(sentences_in_doc):
  bi_lstms.append(Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(words_in_sentence, word_embeddings_size),
                                    return_sequences=True, return_state=True,  name='bidirectional_' + str(i))))
  densors_1.append(Dense(densor1_output_size, activation = "tanh",name='senteence_dense_tanh' + str(i)))
  densors_2.append(Dense(densor2_output_size, activation = "softmax",name='senteence_dense_softmax' + str(i)))



def invoke_sentence(sentence_matrix, index):
  if index==0:
    print(type(sentence_matrix))
    print(tf.shape(sentence_matrix))

  lstm, forward_h, forward_c, backward_h, backward_c = bi_lstms[index](sentence_matrix)
  state_h = Concatenate()([forward_h, backward_h])
  state_c = Concatenate()([forward_c, backward_c])


  attention_middle = densors_1[index](state_h)
  output = densors_2[index](attention_middle)

  if index==0:
    print(f'lstm shape is {lstm.shape}')
    print(f'state_h shape is {state_h.shape}')
    print(f'state_c shape is {state_c.shape}')
    print(f'attention_middle shape is {attention_middle.shape}')
    print(f'output shape is {output.shape}')

  return output

def model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size):
    """
    Arguments:
    words_in_sentence -- Tx -- length of the input sequence - max words per sentence
    sentences_in_doc --Ty -- length of the output sequence - number of sentences per document
    lstm_hidden_size -- hidden state size of the Bi-LSTM
    word_embeddings_size -- size of the word embeddings
    model_output_size -- size of each sentence label (1 or 0)

    Returns:
    model -- Keras model instance
    """
    sentences_embeddings = []

    X = Input(shape=(sentences_in_doc, words_in_sentence, word_embeddings_size), name= 'X')
    for t in range(sentences_in_doc):
      X_doc = Lambda(lambda x: x[:, t,:, :])(X)
      if(t==0):
        print("X_doc")
        print(type(X_doc))
        print(X_doc)
        print(X_doc.shape)

      sentence_embedding = invoke_sentence(X_doc, t)
      sentences_embeddings.append(sentence_embedding)
      if(t==0):
        print("sentence_embedding")
        print(type(sentence_embedding))
        print(sentence_embedding)
        print(sentence_embedding.shape)

    sentences_embeddings_stacked = Lambda(lambda x: tf.stack(x, axis=1))(sentences_embeddings)
    print("sentences_embeddings_stacked")
    print(type(sentences_embeddings_stacked))
    print(sentences_embeddings_stacked)
    print(sentences_embeddings_stacked.shape)

    doc_lstm, doc_forward_h, doc_forward_c, doc_backward_h, doc_backward_c = Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(sentences_in_doc, lstm_hidden_size*2),
                            return_sequences=True, return_state=True, name='bidirectional_document'), 
                       merge_mode='concat')(sentences_embeddings_stacked)
    doc_state_h = Concatenate()([doc_forward_h, doc_backward_h])
    doc_state_c = Concatenate()([doc_forward_c, doc_backward_c])
    print(f'doc_lstm shape is {doc_lstm.shape}')
    print(f'doc_state_h shape is {doc_state_h.shape}')
    print(f'doc_state_c shape is {doc_state_c.shape}')

    attention_middle = Dense(densor1_output_size, activation = "tanh")(doc_state_h)
    document_embedding = Dense(densor2_output_size, activation = "softmax")(attention_middle)
    print(f'document_embedding shape is {document_embedding.shape}')

    # my_layer = MyLayer(input_shape=((400), (10,400), (10,400)), output_dim=2)
    # custom_output = my_layer([document_embedding, sentences_embeddings_stacked, doc_state_h])
    # print(f'custom_output shape is {custom_output.shape}')

    outputs = Dense(model_output_size, activation = "softmax")(document_embedding)           
    model = Model(inputs=X, outputs=outputs)

    return model



model = model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x = x_train, y = y_train, batch_size=5, epochs=1)