我正在使用以下版本:
tf version: 2.3.0
keras version: 2.4.0
我的自定义错误是
def mse_fn(y_true, y_pred):
error = y_true - y_pred
mserror = tf.reduce_mean(tf.square(error))
return mserror
我的代码的数据处理是
(X_train, Y_train), (X_test, Y_test) = keras.datasets.mnist.load_data()
Y_train = np.array(Y_train).astype(np.float32)
Y_test = np.array(Y_test).astype(np.float32)
X_train = X_train/255.
X_test = X_test/255.
我自定义的DNN层是
class MyDense(keras.layers.Layer):
def __init__(self, units, activation=None, **kwargs):
super().__init__(**kwargs)
self.units = units
self.activation = keras.activations.get(activation)
def build(self, batch_input_shape):
self.kernel = self.add_weight(name = "kernel", shape=[batch_input_shape[-1], self.units], initializer="glorot_normal")
self.bias = self.add_weight(name = "bias", shape=[self.units], initializer = 'zeros')
super().build(batch_input_shape)
def call(self, X):
return self.activation(X @ self.kernel + self.bias)
def compute_output_shape(self, batch_input_shape):
return tf.TensorShape(batch_input_shape.as_list()[:-1]+[self.units])
def get_config(self):
base_config = super().get_config()
return {**base_config, "units":self.units, "activation": keras.activations.serialize(self.activation)}
模型是
class DNN_model(keras.Model):
def __init__(self, output_dim, **kwargs):
super().__init__(**kwargs)
self.hidden1 = keras.layers.Flatten(input_shape = [28, 28])
self.hidden2 = MyDense(32, activation='relu')
self.hidden3 = MyDense(16, activation='relu')
self.hidden4 = MyDense(8, activation='relu')
self.hidden5 = MyDense(output_dim, activation='relu')
def call(self, inputs):
Z = self.hidden1(inputs)
Z = self.hidden2(Z)
Z = self.hidden3(Z)
Z = self.hidden4(Z)
Z = self.hidden5(Z)
return Z
当我使用model.fit进行编译和拟合时,我会遭受损失
model_cust_loss = DNN_model(1)
model_cust_loss.compile(optimizer='adam', loss = mse_fn, metrics=['accuracy'])
model_cust_loss.fit(X_train, Y_train, epochs=20 , batch_size=32)
输出
1875/1875 [==============================] - 3s 1ms/step - loss: 2.5784 - accuracy: 0.1690
Epoch 2/20
1875/1875 [==============================] - 3s 1ms/step - loss: 1.0625 - accuracy: 0.1910
Epoch 3/20
1875/1875 [==============================] - 3s 1ms/step - loss: 0.8146 - accuracy: 0.1947
Epoch 4/20
1875/1875 [==============================] - 3s 1ms/step - loss: 0.7062 - accuracy: 0.1981
Epoch 5/20
1875/1875 [==============================] - 3s 1ms/step - loss: 0.6342 - accuracy: 0.1999
Epoch 6/20
1875/1875 [==============================] - 3s 1ms/step - loss: 0.5785 - accuracy: 0.2014
Epoch 7/20
1875/1875 [==============================] - 3s 1ms/step - loss: 0.5263 - accuracy: 0.2015
Epoch 8/20
1875/1875 [==============================] - 3s 1ms/step - loss: 0.4914 - accuracy: 0.2036
Epoch 9/20
1875/1875 [==============================] - 3s 1ms/step - loss: 0.4480 - accuracy: 0.2066
Epoch 10/20
...
然后我尝试自定义循环
def random_batch(X, y, batch_size=32):
idx = np.random.randint(len(X), size=batch_size)
return X[idx], y[idx]
n_epochs = 5
batch_size = 32
n_steps = len(X_train)//batch_size
optimizer = keras.optimizers.Adam()
print("n_steps: ", n_steps)
epoch_losses = []
for epoch in range (1,n_epochs+1):
print("Epoch {}/{}".format(epoch, n_epochs))
batch_losses = []
for step in range(1, n_steps+1):
#print("step:",step)
X_batch, Y_batch = random_batch(X_train, Y_train, batch_size = 32)
with tf.GradientTape() as tape:
Y_pred = model_cust_loss(X_batch, training=True)
current_loss = mse_fn(Y_batch, Y_pred)
batch_losses.append(current_loss)
gradients = tape.gradient(current_loss, model_cust_loss.trainable_variables)
optimizer.apply_gradients(zip(gradients,model_cust_loss.trainable_variables))
epoch_losses.append(np.mean(batch_losses))
print("loss per epoch:", np.mean(batch_losses))
输出的损失值与model.fit方法不同
Epoch 1/5
loss per epoch: 8.3357525
Epoch 2/5
loss per epoch: 8.387993
Epoch 3/5
loss per epoch: 8.356688
Epoch 4/5
loss per epoch: 8.362662
Epoch 5/5
loss per epoch: 8.374953
你能帮忙吗?
谢谢, 德博坦