因此,我有一种用于回归的AlexNet CNN。训练时,训练损失很大,但是测试图像的损失却很大。我以为这是简单的过度拟合,但是当我在训练图像上测试训练后的模型时,即使它是训练过的图像,我也会遭受同样严重的损失。也许我弄错了,但是在火车图像上进行预测时的损失不应该与模型完成时的损失相同吗?奇怪的一点是,输出彼此之间非常接近。
这是两个可能的罪魁祸首:
这是我用于培训和测试的代码,下面是用于再次测试ckpt模型的代码。
from __future__ import absolute_import, division, print_function
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin'
from tensorflow import keras
import keras.backend as K
from keras.utils import plot_model
import numpy as np
import matplotlib.pyplot as plt
import datetime
(train_images, train_labels), (test_images, test_labels) = np.load("dataset.npy", allow_pickle=True)
train_images = train_images / 255
test_images = test_images / 255
train_labels = list(map(float, train_labels))
test_labels = list(map(float, test_labels))
train_labels = [i/10 for i in train_labels]
test_labels = [i/10 for i in test_labels]
start_time = datetime.datetime.now()
model = keras.Sequential([
keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu', input_shape=(128, 128, 3), padding='same'),
keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
keras.layers.Conv2D(filters=256, kernel_size=(11, 11), strides=(1, 1), activation='relu', padding='same'),
keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'),
keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'),
keras.layers.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'),
keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
keras.layers.Flatten(),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(0.4),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(0.4),
keras.layers.Dense(1000, activation='relu'),
keras.layers.Dropout(0.4),
keras.layers.Dense(1)
])
plot_model(model, to_file='model.png')
class_weights = {.05: 93.0,
.10: 31.0,
.15: 93.0,
.20: 18.6,
.25: 46.5,
.30: 4.894736842105263,
.35: 7.75,
.40: 4.043478260869565,
.45: 2.90625,
.50: 2.2142857142857144,
.55: 2.066666666666667,
.60: 1.5,
.65: 1.453125,
.70: 1.0,
.75: 1.273972602739726,
.80: 1.6607142857142858,
.85: 3.72,
.90: 6.642857142857143,
.95: 15.5,
1.0: 1}
model.compile(loss='mean_squared_error',
optimizer=keras.optimizers.SGD(0.01),
metrics=['mean_squared_error'],
weighted_metrics=['mean_squared_error'])
train_images = train_images.reshape(462, 128, 128, 3)
test_images = test_images.reshape(116, 128, 128, 3)
history = model.fit(train_images, train_labels, epochs=200, callbacks=[keras.callbacks.ModelCheckpoint("./model.ckpt", monitor='mean_squared_error', save_best_only=True, verbose=1)], class_weight=class_weights)
# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.ylim(0, 0.5)
plt.show()
model.load_weights("./model.ckpt")
predictions = model.predict(test_images)
totalDifference = 0
for i in range(116):
print("%s: %s" % (test_labels[i] * 10, predictions[i] * 10))
totalDifference += abs(test_labels[i] - predictions[i])
avgDifference = totalDifference / 11.6
print("\n%s\n" % avgDifference)
print("Time Elapsed:")
print(datetime.datetime.now() - start_time)
通过.ckpt文件加载权重进行测试。
from __future__ import absolute_import, division, print_function
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin'
from tensorflow import keras
import numpy as np
(train_images, train_labels), (test_images, test_labels) = np.load("dataset.npy", allow_pickle=True)
train_images = train_images / 255
test_images = test_images / 255
train_labels = list(map(float, train_labels))
test_labels = list(map(float, test_labels))
train_labels = [i/10 for i in train_labels]
test_labels = [i/10 for i in test_labels]
model = keras.Sequential([
keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu', input_shape=(128, 128, 3), padding='same'),
keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
keras.layers.Conv2D(filters=256, kernel_size=(11, 11), strides=(1, 1), activation='relu', padding='same'),
keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'),
keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'),
keras.layers.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'),
keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
keras.layers.Flatten(),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(0.4),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(0.4),
keras.layers.Dense(1000, activation='relu'),
keras.layers.Dropout(0.4),
keras.layers.Dense(1)
])
model.compile(loss='mean_squared_error',
optimizer=keras.optimizers.SGD(0.01),
metrics=['mean_squared_error'],
weighted_metrics=['mean_squared_error'])
train_images = train_images.reshape(462, 128, 128, 3)
test_images = test_images.reshape(116, 128, 128, 3)
model.load_weights("./model.ckpt")
train_pred = model.predict(train_images)
totalDifference = 0
for i in range(116):
print("%s: %s" % (train_labels[i] * 10, train_pred[i] * 10))
totalDifference += abs(train_labels[i] - train_pred[i])
avgDifference = totalDifference / 11.6
print("\n%s\n" % avgDifference)
predictions = model.predict(test_images)
totalDifference = 0
for i in range(116):
print("%s: %s" % (test_labels[i] * 10, predictions[i] * 10))
totalDifference += abs(test_labels[i] - predictions[i])
avgDifference = totalDifference / 11.6
print("\n%s\n" % avgDifference)
任何想法都很有帮助,谢谢。
答案 0 :(得分:0)
我不确定,但是在线
history = model.fit(train_images,
train_labels,
epochs=200,
callbacks=[keras.callbacks.ModelCheckpoint("./model.ckpt", monitor='mean_squared_error', save_best_only=True, verbose=1)],
class_weight=class_weights)
您可能正在保存具有最大均方误差的模型,因此请传递mode=min
作为参数,以确保最后保存具有最小均方误差的模型。