我正在尝试修改keras'blog中的分类示例,以便对3个不同类别的图像进行分类。
我有3000张图像(3 x 1000)的训练和1200(3 x 400)的验证。 修改代码以对3个clases进行分类。
代码如下:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
# dimensions of our images.
img_width, img_height = 150, 150
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 3000
nb_validation_samples = 1200
epochs = 50
batch_size = 16
n_classes = 3
def save_bottlebeck_features():
datagen = ImageDataGenerator(rescale=1. / 255)
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')
generator = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
shuffle=False)
bottleneck_features_train = model.predict_generator(
generator, nb_train_samples // batch_size)
np.save(open('bottleneck_features_train.npy', 'wb'),
bottleneck_features_train)
generator = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
shuffle=False)
bottleneck_features_validation = model.predict_generator(
generator, nb_validation_samples // batch_size)
np.save(open('bottleneck_features_validation.npy', 'wb'),
bottleneck_features_validation)
def train_top_model():
train_data = np.load(open('bottleneck_features_train.npy','rb'))
train_labels = np.array([0] * (nb_train_samples // n_classes) + [1] * (nb_train_samples // n_classes) + \
[2] * (nb_train_samples // n_classes))
validation_data = np.load(open('bottleneck_features_validation.npy','rb'))
validation_labels = np.array([0] * (nb_train_samples // n_classes) + [1] * (nb_train_samples // n_classes) + \
[2] * (nb_train_samples // n_classes))
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size, \
validation_data=(validation_data, validation_labels))
model.save_weights(top_model_weights_path)
当我最终执行这两个函数时:
save_bottlebeck_features()
train_top_model()
第二个函数返回以下错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-143-070a6188c611> in <module>()
4 print(validation_labels.shape)
5
----> 6 train_top_model()
<ipython-input-129-ea2b02024693> in train_top_model()
64 loss='categorical_crossentropy', metrics=['accuracy'])
65
---> 66 model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(validation_data, validation_labels))
67 model.save_weights(top_model_weights_path)
~/anaconda/lib/python3.6/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
865 class_weight=class_weight,
866 sample_weight=sample_weight,
--> 867 initial_epoch=initial_epoch)
868
869 def evaluate(self, x, y, batch_size=32, verbose=1,
~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1520 class_weight=class_weight,
1521 check_batch_axis=False,
-> 1522 batch_size=batch_size)
1523 # Prepare validation data.
1524 do_validation = False
~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
1380 output_shapes,
1381 check_batch_axis=False,
-> 1382 exception_prefix='target')
1383 sample_weights = _standardize_sample_weights(sample_weight,
1384 self._feed_output_names)
~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
142 ' to have shape ' + str(shapes[i]) +
143 ' but got array with shape ' +
--> 144 str(array.shape))
145 return arrays
146
ValueError: Error when checking target: expected dense_58 to have shape (None, 3) but got array with shape (3000, 1)
如果我打印数据和标签的形状,它将返回:
print(train_labels.shape)
(3000, 3)
print(train_data.shape)
(3000, 3)
print(validation_data.shape)
(1200, 4, 4, 512)
print(validation_labels.shape)
(1200,)
修改:
我正在发布完整的代码以及包含我正在考虑的图像的数据库。
可以下载数据库here。
代码如下:
# dimensions of our images.
img_width, img_height = 150, 150
top_model_weights_path = 'what.h5'#'bottleneck_fc_model.h5'
train_data_dir = 'data_short/train'
validation_data_dir = 'data_short/validation'
nb_train_samples = 30
nb_validation_samples = 6
epochs = 50
batch_size = 16
n_classes = 3
def save_bottlebeck_features():
datagen = ImageDataGenerator(rescale=1. / 255)
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')
generator = datagen.flow_from_directory(train_data_dir, target_size=(img_width, img_height),\
batch_size=batch_size, class_mode='categorical', shuffle=False)
bottleneck_features_train = model.predict_generator(generator, nb_train_samples // batch_size)
np.save(open('bottleneck_features_train.npy', 'wb'), bottleneck_features_train)
generator = datagen.flow_from_directory(validation_data_dir, target_size=(img_width, img_height),\
batch_size=batch_size, class_mode='categorical', shuffle=False)
bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples // batch_size)
np.save(open('bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)
def train_top_model():
encoder = OneHotEncoder()
#train_data = np.load(open('bottleneck_features_train.npy','rb'))
train_data = np.load('bottleneck_features_train.npy')
train_labels = np.array([0] * (nb_train_samples // n_classes) + [1] * (nb_train_samples // n_classes) +
[2] * (nb_train_samples // n_classes))
train_labels = to_categorical(train_labels)
validation_data = np.load(open('bottleneck_features_validation.npy','rb'))
validation_labels = np.array([0] * (nb_validation_samples // n_classes) + \
[1] * (nb_validation_samples // n_classes) + \
[2] * (nb_validation_samples // n_classes))
validation_labels = to_categorical(validation_labels)
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size,\
validation_data=(validation_data, validation_labels))
model.save_weights(top_model_weights_path)
给出的错误如下:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-8-6869607a6e44> in <module>()
----> 1 train_top_model()
<ipython-input-6-933b6592c6c1> in train_top_model()
56 model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
57
---> 58 model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(validation_data, validation_labels))
59 model.save_weights(top_model_weights_path)
~/anaconda/lib/python3.6/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
861 class_weight=class_weight,
862 sample_weight=sample_weight,
--> 863 initial_epoch=initial_epoch)
864
865 def evaluate(self, x, y, batch_size=32, verbose=1,
~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
1356 class_weight=class_weight,
1357 check_batch_axis=False,
-> 1358 batch_size=batch_size)
1359 # Prepare validation data.
1360 if validation_data:
~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
1244 for (ref, sw, cw, mode)
1245 in zip(y, sample_weights, class_weights, self._feed_sample_weight_modes)]
-> 1246 _check_array_lengths(x, y, sample_weights)
1247 _check_loss_and_target_compatibility(y,
1248 self._feed_loss_fns,
~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _check_array_lengths(inputs, targets, weights)
235 'the same number of samples as target arrays. '
236 'Found ' + str(list(set_x)[0]) + ' input samples '
--> 237 'and ' + str(list(set_y)[0]) + ' target samples.')
238 if len(set_w) > 1:
239 raise ValueError('All sample_weight arrays should have '
ValueError: Input arrays should have the same number of samples as target arrays. Found 16 input samples and 30 target samples.
EDIT2 解决方案:
我解决了这个问题,使代码发生了根本性的变化。可以看到here。
答案 0 :(得分:2)
(BatchSize,w,h,channels)
(BatchSize,3)
错误消息告诉您,您提供的形状类似(BatchSize,1)
的模型输出数据不适合模型。
因此,在创建train_labels
时肯定会遇到问题。
您必须将其整形为(3000,3)
。与每个班级相关的指数应为1:
您可能已经合并了课程(如果您的情况可能的话)。
使用keras.utils.to_categorical()
但请确保train_labels.shape[0]
与train_data.shape[0]
完全相同。
from keras.utils import to_categorical
train_labels = np.array([0] * (nb_train_samples // n_classes) + [1] * (nb_train_samples // n_classes) +
[2] * (nb_train_samples // n_classes))
train_labels = to_categorical(train_labels)
另一种创建标签的简单方法:
train_labels = np.zeros((30,3))
train_labels[:10,0] = 1.
train_labels[10:20,1] = 1.
train_labels[20:,2] = 1.