我正在尝试在LSTM网络上尝试本教程。代码如下:
from keras.models import Model
from keras import layers
from keras import Input
text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500
# Our text input is a variable-length sequence of integers.
# Note that we can optionally name our inputs!
text_input = Input(shape=(None,), dtype='int32', name='text')
# Which we embed into a sequence of vectors of size 64
embedded_text = layers.Embedding(64, text_vocabulary_size)(text_input)
# Which we encoded in a single vector via a LSTM
encoded_text = layers.LSTM(32)(embedded_text)
# Same process (with different layer instances) for the question
question_input = Input(shape=(None,), dtype='int32', name='question')
embedded_question = layers.Embedding(32, question_vocabulary_size)(question_input)
encoded_question = layers.LSTM(16)(embedded_question)
# We then concatenate the encoded question and encoded text
concatenated = layers.concatenate([encoded_text, encoded_question], axis=-1)
# And we add a softmax classifier on top
answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated)
# At model instantiation, we specify the two inputs and the output:
model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['acc'])
num_samples = 1000
max_length = 100
text = np.random.randint(1, text_vocabulary_size,
size=(num_samples, max_length))
question = np.random.randint(1, question_vocabulary_size,
size=(num_samples, max_length))
answers = np.random.randint(0, 1,
size=(num_samples, answer_vocabulary_size))
model.fit([text, question], answers, epochs=10, batch_size=128)
但是,出现以下错误
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-115-097c6009c2d2> in <module>()
45 size=(num_samples, answer_vocabulary_size))
46
---> 47 model.fit([text, question], answers, epochs=10, batch_size=128)
C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1037 initial_epoch=initial_epoch,
1038 steps_per_epoch=steps_per_epoch,
-> 1039 validation_steps=validation_steps)
1040
1041 def evaluate(self, x=None, y=None,
C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training_arrays.py in fit_loop(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
197 ins_batch[i] = ins_batch[i].toarray()
198
--> 199 outs = f(ins_batch)
200 outs = to_list(outs)
201 for l, o in zip(out_labels, outs):
C:\ProgramData\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in __call__(self, inputs)
2713 return self._legacy_call(inputs)
2714
-> 2715 return self._call(inputs)
2716 else:
2717 if py_any(is_tensor(x) for x in inputs):
C:\ProgramData\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in _call(self, inputs)
2673 fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata)
2674 else:
-> 2675 fetched = self._callable_fn(*array_vals)
2676 return fetched[:len(self.outputs)]
2677
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in __call__(self, *args, **kwargs)
1437 ret = tf_session.TF_SessionRunCallable(
1438 self._session._session, self._handle, args, status,
-> 1439 run_metadata_ptr)
1440 if run_metadata:
1441 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it stays alive
530 # as there is a reference to status from this from the traceback due to
InvalidArgumentError: indices[56,0] = 7134 is not in [0, 32)
[[{{node embedding_18/embedding_lookup}}]]
我不明白错误在说什么,也不知道如何解决。
从我阅读的内容中,我认为这意味着我的输入没有正确的尺寸,但实际上并没有弄错什么。我完全按照Francois Challot在第239-240页上的《用Python深度学习》编写的代码进行了复制。因此它应该运行。我正在尝试使用它来理解RNN的工作原理,并更好地了解代码机制,因此,修复此问题的任何帮助都将是很棒的。
注意,我从这个问题中保留的代码中的一些注释: How LSTM deal with variable length sequence
这是model.summary()
的印刷品