Question

我一直在尝试仅使用基础Keras层重新创建一个简单的DNN，并从头开始编写所有内容。一切似乎都正常，但是在训练循环中，我得到了这个错误： AttributeError: 'SparseCategoricalCrossentropy' object has no attribute '_id'

我尝试将损失函数更改为CategoricalCrossentropy和SparseCategoricalCrossentropy（使用from_logits为True或False），但是错误始终会弹出。

代码如下：

import numpy as np
import tensorflow as tf
from tensorflow import keras

from utils import plot_image, plot_mnist_results, plot_value_array


class Flatten(keras.layers.Layer):
    def __init__(self):
        super(Flatten, self).__init__()

    def build(self, input_shape):
        self.output_size = np.prod(input_shape)

    def call(self, X):
        return tf.reshape(X, shape=(-1, self.output_size))


class Dense(keras.layers.Layer):
    def __init__(self, units, activation):
        super(Dense, self).__init__()
        self.units = units
        self.activation = activation

    def build(self, input_shape):
        self.kernel = self.add_weight(
            name='kernel',
            dtype=tf.float64,
            initializer='glorot_normal',
            trainable=True,
            shape=(input_shape[-1], self.units)
        )
        self.bias = self.add_weight(
            name='bias',
            dtype=tf.float64,
            initializer=keras.initializers.Constant(0.1),
            trainable=True,
            shape=(1, self.units)
        )

    def call(self, X):
        return self.activation(tf.matmul(X, self.kernel) + self.bias)


class DNN(keras.models.Model):
    def __init__(self, units, activation):
        super(DNN, self).__init__()
        self.units = units
        self.activation = activation

    def build(self, input_shape):
        self.flatten = Flatten()
        self.hidden_layer = Dense(self.units, tf.nn.relu)
        self.output_layer = Dense(10, tf.nn.softmax)

    def call(self, X):
        print(self.hidden_layer(self.flatten(X)).shape)
        print(self.output_layer(self.hidden_layer(self.flatten(X))).shape)
        return self.output_layer(self.hidden_layer(self.flatten(X)))


# @tf.function
def train(model, loss, opt, X, y):
    with tf.GradientTape() as tape:
        gradients = tape.gradient(loss(model(X), y), model.trainable_variables)
        gradient_variables = zip(gradients, model.trainable_variables)
        opt.apply_gradients(gradient_variables)


mnist = keras.datasets.mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images / 255.0
test_images = test_images / 255.0

model = DNN(units=128, activation=tf.nn.relu)
opt = tf.optimizers.Adam(learning_rate=1e-3)

for epoch in range(3):

    for step in range(train_labels.shape[0]):
        loss = keras.losses.SparseCategoricalCrossentropy
        train(model, loss, opt, train_images[step, :, :], train_labels[step])

    train_loss = loss(model(train_images), train_labels)

    template = 'Epoch {}, Train loss: {:.5f}'
    print(template.format(epoch + 1, train_loss.numpy()))

我希望模型能够成功训练，但事实并非如此。我在做什么错了？

Answer 1

从给定的代码中，我可以看到您在如下所示的位置混合使用了tf和keras。

opt = tf.optimizers.Adam(learning_rate=1e-3)

loss = keras.losses.SparseCategoricalCrossentropy

这可能会引起类似的问题。对于TensorFlow 2.0，您可以在直接使用keras的所有地方统一使用tf.keras。

我也可以发现，您正在实例化批处理循环中的损失对象。这是不正确的。您必须在启动纪元循环的顶部实例化。

一切似乎都很好。希望这会有所帮助！

AttributeError：“ SparseCategoricalCrossentropy”对象没有属性“ _id”

1 个答案: