我使用 tensorflow_recommenders 来测试两个塔模型,但收到以下警告:
<块引用>tensorflow:Model 是用形状 (None, None) 构建的,用于输入 KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='item_embed_input'), name='item_embed_input', description="created by layer 'item_embed_input'"),但它被称为 在形状不兼容的输入上 (None,)
源代码:
import numpy as np
import tensorflow as tf
import tensorflow_recommenders as tfrs
n_unique_users = 100
n_unique_items = 50
n_records = 1000
def test_twotower(ratings, items):
model = TwoTowerTensorFlowModel(n_unique_users, n_unique_items, items.batch(128))
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
train_size = int(n_records * 0.8)
train = ratings.take(train_size).shuffle(n_records).batch(100)
test = ratings.skip(train_size).take(n_records - train_size).shuffle(n_records).batch(50)
model.fit(train, epochs=3)
class TwoTowerTensorFlowModel(tfrs.Model):
def __init__(self, n_unique_users, n_unique_items, items):
super().__init__()
embedding_dim = 32
print("unique size: user {}, item {}".format(n_unique_users, n_unique_items))
self.user_model = tf.keras.Sequential([tf.keras.layers.Embedding(n_unique_users, embedding_dim, name="user_embed")])
self.item_model = tf.keras.Sequential([tf.keras.layers.Embedding(n_unique_items, embedding_dim, name="item_embed")])
self.task = tfrs.tasks.Retrieval(
metrics=tfrs.metrics.FactorizedTopK(
candidates=items.map(self.item_model)
)
)
def compute_loss(self, features, training=False):
user_embeddings = self.user_model(features[0])
item_embeddings = self.item_model(features[1])
return self.task(user_embeddings, item_embeddings)
if '__main__' == __name__:
rating_users = np.random.randint(n_unique_users, size=(n_records,))
rating_items = np.random.randint(n_unique_items, size=(n_records,))
item_ids = np.arange(0, n_unique_items)
ratings = tf.data.Dataset.from_tensor_slices((rating_users, rating_items))
items = tf.data.Dataset.from_tensor_slices(item_ids)
# two tower model
test_twotower(ratings, items)
参数'features'被发送到TwoTowerTensorFlowModel.comput_loss。
features[0] 具有形状 (None,),但嵌入层需要 (None, None)。
第一个 None 表示批量大小,但我在设置训练数据集时已经完成了批量操作。那么如何解决问题呢?