Question

我正在以 bilstm-attention-conv1d 方式构建模型（我想使用具有不同内核大小的多个 conv1d）我面临着注意力层和 conv1d 层之间的层不兼容问题。我尝试过 Reshape 功能，但它不起作用，以下是我的代码：

我的模型如下


sequence_input = Input(shape=(maxlen,), dtype="int32")
embedded_sequences = Embedding(50000, output_dim=output_dim)(sequence_input)

lstm = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences = True), name="bi_lstm_0")(embedded_sequences)

# Getting our LSTM outputs
(lstm, forward_h, forward_c, backward_h, backward_c) = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True),
                                                                     name="bi_lstm_1")(lstm)

state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])

context_vector, attention_weights = Attention(10)(lstm, state_h)

x = Reshape((maxlen, output_dim, 1))(context_vector)


kernel_sizes = [1,2,3,4,5]
convs = []
for kernel_size in range(len(kernel_sizes)):
    conv = Conv1D(128, kernel_size, activation='relu')(x)
    convs.append(conv)
    
avg_pool = GlobalAveragePooling1D()(convs)
max_pool = GlobalMaxPooling1D()(convs)
conc = concatenate([avg_pool, max_pool])
output = Dense(50, activation="sigmoid")(conc)

model = keras.Model(inputs=sequence_input, outputs=output)
print(model.summary())

我的代码给了我以下错误：

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-114-8e5c0c75e84a> in <module>()
     13 context_vector, attention_weights = Attention(10)(lstm, state_h)
     14 
---> 15 x = Reshape((maxlen, output_dim, 1))(context_vector)
     16 
     17 

6 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    950     if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
    951       return self._functional_construction_call(inputs, args, kwargs,
--> 952                                                 input_list)
    953 
    954     # Maintains info about the `Layer.call` stack.

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
   1089         # Check input assumptions set after layer building, e.g. input shape.
   1090         outputs = self._keras_tensor_symbolic_call(
-> 1091             inputs, input_masks, args, kwargs)
   1092 
   1093         if outputs is None:

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
    820       return nest.map_structure(keras_tensor.KerasTensor, output_signature)
    821     else:
--> 822       return self._infer_output_signature(inputs, args, kwargs, input_masks)
    823 
    824   def _infer_output_signature(self, inputs, args, kwargs, input_masks):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
    861           # TODO(kaftan): do we maybe_build here, or have we already done it?
    862           self._maybe_build(inputs)
--> 863           outputs = call_fn(inputs, *args, **kwargs)
    864 
    865         self._handle_activity_regularization(inputs, outputs)

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
    555       # Set the static shape for the result since it might lost during array_ops
    556       # reshape, eg, some `None` dim in the result could be inferred.
--> 557       result.set_shape(self.compute_output_shape(inputs.shape))
    558     return result
    559 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in compute_output_shape(self, input_shape)
    546       output_shape = [input_shape[0]]
    547       output_shape += self._fix_unknown_dimension(input_shape[1:],
--> 548                                                   self.target_shape)
    549     return tensor_shape.TensorShape(output_shape)
    550 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in _fix_unknown_dimension(self, input_shape, output_shape)
    534       output_shape[unknown] = original // known
    535     elif original != known:
--> 536       raise ValueError(msg)
    537     return output_shape
    538 

ValueError: total size of new array must be unchanged, input_shape = [256], output_shape = [2500, 100, 1]

请帮助我

keras 中注意力层和 CONV1D 之间的层兼容性

0 个答案: