我一直在尝试仅使用基础Keras层重新创建一个简单的DNN,并从头开始编写所有内容。一切似乎都正常,但是在训练循环中,我得到了这个错误:
AttributeError: 'SparseCategoricalCrossentropy' object has no attribute '_id'
我尝试将损失函数更改为CategoricalCrossentropy和SparseCategoricalCrossentropy(使用from_logits为True或False),但是错误始终会弹出。
代码如下:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from utils import plot_image, plot_mnist_results, plot_value_array
class Flatten(keras.layers.Layer):
def __init__(self):
super(Flatten, self).__init__()
def build(self, input_shape):
self.output_size = np.prod(input_shape)
def call(self, X):
return tf.reshape(X, shape=(-1, self.output_size))
class Dense(keras.layers.Layer):
def __init__(self, units, activation):
super(Dense, self).__init__()
self.units = units
self.activation = activation
def build(self, input_shape):
self.kernel = self.add_weight(
name='kernel',
dtype=tf.float64,
initializer='glorot_normal',
trainable=True,
shape=(input_shape[-1], self.units)
)
self.bias = self.add_weight(
name='bias',
dtype=tf.float64,
initializer=keras.initializers.Constant(0.1),
trainable=True,
shape=(1, self.units)
)
def call(self, X):
return self.activation(tf.matmul(X, self.kernel) + self.bias)
class DNN(keras.models.Model):
def __init__(self, units, activation):
super(DNN, self).__init__()
self.units = units
self.activation = activation
def build(self, input_shape):
self.flatten = Flatten()
self.hidden_layer = Dense(self.units, tf.nn.relu)
self.output_layer = Dense(10, tf.nn.softmax)
def call(self, X):
print(self.hidden_layer(self.flatten(X)).shape)
print(self.output_layer(self.hidden_layer(self.flatten(X))).shape)
return self.output_layer(self.hidden_layer(self.flatten(X)))
# @tf.function
def train(model, loss, opt, X, y):
with tf.GradientTape() as tape:
gradients = tape.gradient(loss(model(X), y), model.trainable_variables)
gradient_variables = zip(gradients, model.trainable_variables)
opt.apply_gradients(gradient_variables)
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
model = DNN(units=128, activation=tf.nn.relu)
opt = tf.optimizers.Adam(learning_rate=1e-3)
for epoch in range(3):
for step in range(train_labels.shape[0]):
loss = keras.losses.SparseCategoricalCrossentropy
train(model, loss, opt, train_images[step, :, :], train_labels[step])
train_loss = loss(model(train_images), train_labels)
template = 'Epoch {}, Train loss: {:.5f}'
print(template.format(epoch + 1, train_loss.numpy()))
我希望模型能够成功训练,但事实并非如此。我在做什么错了?
答案 0 :(得分:0)
从给定的代码中,我可以看到您在如下所示的位置混合使用了tf和keras。
opt = tf.optimizers.Adam(learning_rate=1e-3)
loss = keras.losses.SparseCategoricalCrossentropy
这可能会引起类似的问题。对于TensorFlow 2.0,您可以在直接使用keras的所有地方统一使用tf.keras。
我也可以发现,您正在实例化批处理循环中的损失对象。这是不正确的。您必须在启动纪元循环的顶部实例化。
一切似乎都很好。希望这会有所帮助!