资源
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print(tf.version.VERSION)
print(keras.__version__)
#2.5.0
#2.5.0
LSTM Encoder Decoder Model with Attention
n_features = 129
type_max = 3
n_padded_in = 10
n_padded_out = 10
input_item = layers.Input(batch_input_shape=[None, n_padded_in],
name="item_input",
dtype=tf.int64)
input_type = layers.Input(batch_input_shape=[None, n_padded_in],
name="type_input",
dtype=tf.int64)
encoding_padding_mask = tf.math.logical_not(tf.math.equal(input_item, 0))
embedding_item = layers.Embedding(input_dim=n_features,
output_dim=batch_size,
name="item_embedding")(input_item)
embedding_type = layers.Embedding(input_dim=type_max+1,
output_dim=batch_size,
name="rec_embedding")(input_type)
concat_inputs = layers.Concatenate(name="concat_inputs")(
[embedding_item, embedding_type])
concat_inputs = tf.keras.layers.BatchNormalization(
name="batchnorm_inputs")(concat_inputs)
encoder_lstm = layers.LSTM(units=latent_dim,
return_state=True,
name="lstm_encoder")
encoder_output, hidden, cell = encoder_lstm(concat_inputs)
states = [hidden, cell]
decoder_output = hidden
decoder_lstm = layers.LSTM(units=latent_dim,
return_state=True,
name="lstm_decoder")
output_dense = layers.Dense(n_features, name="output")
att = layers.Attention(use_scale=False,
causal=True,
name="attention")
inputs = np.zeros((batch_size, 1, n_features))
all_outputs = []
for _ in range(n_padded_out):
context_vector = att([decoder_output, encoder_output])
context_vector = tf.expand_dims(context_vector, 1)
inputs = tf.cast(inputs, tf.float32)
inputs = tf.concat([context_vector, inputs], axis=-1)
decoder_output, state_h, state_c = decoder_lstm(inputs, initial_state=states)
output = output_dense(decoder_output)
output = tf.expand_dims(output, 1)
all_outputs.append(output)
inputs = output
states = [state_h, state_c]
all_outputs = layers.Lambda(lambda x: tf.concat(x, axis=1))(all_outputs)
type_encoder_model = keras.Model([input_item, input_type],
all_outputs,
name="type_encoder_model")
type_encoder_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(learning_rate=l_rate),
metrics=["sparse_categorical_accuracy"])
type_encoder_model.summary()
数据准备
#second input as sequence
type_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["product_type"].to_list(),
maxlen=n_padded_in,
padding="pre",
value=0.0
)
#first input sequence
input_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["input_seq"].to_list(),
maxlen=n_padded_in,
padding="pre",
value=0.0
)
#output sequence
output_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["output_seq"].to_list(),
maxlen=n_padded_out,
padding="pre",
value=0.0
)
数据样本
type_seq_padded
array([[0, 0, 0, ..., 1, 1, 1],
[0, 0, 0, ..., 2, 3, 3],
[0, 0, 0, ..., 3, 3, 3],
...,
[0, 0, 0, ..., 1, 3, 3],
[0, 0, 0, ..., 3, 3, 3],
[0, 0, 0, ..., 3, 3, 3]], dtype=int32)
input_seq_padded
array([[ 0, 0, 0, ..., 101, 58, 123],
[ 0, 0, 0, ..., 79, 95, 87],
[ 0, 0, 0, ..., 98, 109, 123],
...,
[ 0, 0, 0, ..., 123, 109, 98],
[ 0, 0, 0, ..., 109, 98, 123],
[ 0, 0, 0, ..., 95, 123, 95]], dtype=int32)
output_seq_padded
array([[ 0, 0, 0, ..., 58, 123, 43],
[ 0, 0, 0, ..., 95, 87, 123],
[ 0, 0, 0, ..., 109, 123, 10],
...,
[ 0, 0, 0, ..., 109, 98, 123],
[ 0, 0, 0, ..., 98, 123, 43],
[ 0, 0, 0, ..., 123, 95, 95]], dtype=int32)
我的 LSTM 编码器解码器模型将 2 个输入作为序列:项目和项目类型,以及 1 个输出序列:项目。最后一个密集层计算购买 129 种不同商品作为下一个要购买的商品的概率。使用以下代码训练模型:
hist = type_encoder_model.fit([input_seq_padded[:64000],
type_seq_padded[:64000]],
output_seq_padded[:64000],
epochs=1,
batch_size=128,
verbose=1)
当我尝试使用模型进行预测时,代码如下:
y_pred = base_model_X.predict([input_seq_padded_test,
type_seq_padded_test])
测试样本
type_seq_padded_test
array([[0, 0, 0, ..., 2, 3, 3],
[0, 0, 0, ..., 3, 2, 2],
[0, 0, 0, ..., 3, 3, 3],
...,
[0, 0, 0, ..., 3, 2, 1],
[0, 0, 0, ..., 3, 2, 3],
[0, 0, 0, ..., 2, 3, 3]], dtype=int32)
input_seq_padded_test
array([[ 0, 0, 0, ..., 31, 10, 13],
[ 0, 0, 0, ..., 9, 6, 6],
[ 0, 0, 0, ..., 13, 13, 9],
...,
[ 0, 0, 0, ..., 10, 51, 18],
[ 0, 0, 0, ..., 12, 44, 12],
[ 0, 0, 0, ..., 6, 9, 11]], dtype=int32)
我收到如下错误:
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1569 predict_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1559 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1552 run_step **
outputs = model.predict_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1525 predict_step
return self(x, training=False)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1030 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/functional.py:421 call
inputs, training=training, mask=mask)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/functional.py:556 _run_internal_graph
outputs = node.layer(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1030 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py:1363 _call_wrapper
return self._call_wrapper(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py:1395 _call_wrapper
result = self.function(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py:1768 concat
return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_array_ops.py:1228 concat_v2
"ConcatV2", values=values, axis=axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
attrs=attr_protos, op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py:601 _create_op_internal
compute_device)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:3565 _create_op_internal
op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:2042 __init__
control_input_ops, op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:1883 _create_c_op
raise ValueError(str(e))
ValueError: Dimension 0 in both shapes must be equal, but are 32 and 128. Shapes are [32,1] and [128,1]. for '{{node base_model_X/tf.concat_40/concat}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](base_model_X/tf.expand_dims_80/ExpandDims, base_model_X/148834, base_model_X/tf.concat_40/concat/axis)' with input shapes: [32,1,256], [128,1,137], [] and with computed input tensors: input[2] = <-1>.
现在,我正在寻找解决上述下一个错误的方法。我不知道人们怎么想,但我真的开始讨厌编码器-解码器 lstm。预先感谢您的想法或不同的模型配置建议。