keras分类:输入数据形状

时间:2017-09-07 15:15:39

标签: tensorflow keras classification

我正在尝试修改keras'blog中的分类示例,以便对3个不同类别的图像进行分类。

我有3000张图像(3 x 1000)的训练和1200(3 x 400)的验证。 修改代码以对3个clases进行分类。

代码如下:

import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications

# dimensions of our images.
img_width, img_height = 150, 150

top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 3000
nb_validation_samples = 1200
epochs = 50
batch_size = 16

n_classes = 3


def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    bottleneck_features_train = model.predict_generator(
        generator, nb_train_samples // batch_size)
    np.save(open('bottleneck_features_train.npy', 'wb'),
            bottleneck_features_train)

    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    bottleneck_features_validation = model.predict_generator(
        generator, nb_validation_samples // batch_size)
    np.save(open('bottleneck_features_validation.npy', 'wb'),
            bottleneck_features_validation)


def train_top_model():
    train_data = np.load(open('bottleneck_features_train.npy','rb'))
    train_labels = np.array([0] * (nb_train_samples // n_classes) + [1] * (nb_train_samples // n_classes) + \
                            [2] * (nb_train_samples // n_classes))

    validation_data = np.load(open('bottleneck_features_validation.npy','rb'))
    validation_labels = np.array([0] * (nb_train_samples // n_classes) + [1] * (nb_train_samples // n_classes) + \
                                 [2] * (nb_train_samples // n_classes))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_classes, activation='softmax'))

    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size, \
              validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)

当我最终执行这两个函数时:

save_bottlebeck_features()
train_top_model()

第二个函数返回以下错误:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-143-070a6188c611> in <module>()
      4 print(validation_labels.shape)
      5 
----> 6 train_top_model()

<ipython-input-129-ea2b02024693> in train_top_model()
     64                   loss='categorical_crossentropy', metrics=['accuracy'])
     65 
---> 66     model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size,               validation_data=(validation_data, validation_labels))
     67     model.save_weights(top_model_weights_path)

~/anaconda/lib/python3.6/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
    865                               class_weight=class_weight,
    866                               sample_weight=sample_weight,
--> 867                               initial_epoch=initial_epoch)
    868 
    869     def evaluate(self, x, y, batch_size=32, verbose=1,

~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1520             class_weight=class_weight,
   1521             check_batch_axis=False,
-> 1522             batch_size=batch_size)
   1523         # Prepare validation data.
   1524         do_validation = False

~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
   1380                                     output_shapes,
   1381                                     check_batch_axis=False,
-> 1382                                     exception_prefix='target')
   1383         sample_weights = _standardize_sample_weights(sample_weight,
   1384                                                      self._feed_output_names)

~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
    142                             ' to have shape ' + str(shapes[i]) +
    143                             ' but got array with shape ' +
--> 144                             str(array.shape))
    145     return arrays
    146 

ValueError: Error when checking target: expected dense_58 to have shape (None, 3) but got array with shape (3000, 1)

如果我打印数据和标签的形状,它将返回:

print(train_labels.shape)
(3000, 3)
print(train_data.shape)
(3000, 3)
print(validation_data.shape)
(1200, 4, 4, 512)
print(validation_labels.shape)
(1200,)

修改

我正在发布完整的代码以及包含我正在考虑的图像的数据库。

可以下载数据库here

代码如下:

# dimensions of our images.
img_width, img_height = 150, 150

top_model_weights_path = 'what.h5'#'bottleneck_fc_model.h5'
train_data_dir = 'data_short/train'
validation_data_dir = 'data_short/validation'
nb_train_samples = 30
nb_validation_samples = 6
epochs = 50
batch_size = 16

n_classes = 3


def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(train_data_dir, target_size=(img_width, img_height),\
                                            batch_size=batch_size, class_mode='categorical', shuffle=False)

    bottleneck_features_train = model.predict_generator(generator, nb_train_samples // batch_size)

    np.save(open('bottleneck_features_train.npy', 'wb'), bottleneck_features_train)

    generator = datagen.flow_from_directory(validation_data_dir, target_size=(img_width, img_height),\
                                            batch_size=batch_size, class_mode='categorical', shuffle=False)

    bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples // batch_size)

    np.save(open('bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)


def train_top_model():
    encoder = OneHotEncoder()
    #train_data = np.load(open('bottleneck_features_train.npy','rb'))
    train_data = np.load('bottleneck_features_train.npy')

    train_labels = np.array([0] * (nb_train_samples // n_classes) + [1] * (nb_train_samples // n_classes) + 
                            [2] * (nb_train_samples // n_classes))

    train_labels = to_categorical(train_labels)


    validation_data = np.load(open('bottleneck_features_validation.npy','rb'))
    validation_labels = np.array([0] * (nb_validation_samples // n_classes) + \
                                 [1] * (nb_validation_samples // n_classes) + \
                                 [2] * (nb_validation_samples // n_classes))

    validation_labels = to_categorical(validation_labels)

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_classes, activation='softmax'))

    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size,\
              validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)

给出的错误如下:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-8-6869607a6e44> in <module>()
----> 1 train_top_model()

<ipython-input-6-933b6592c6c1> in train_top_model()
     56     model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
     57 
---> 58     model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size,              validation_data=(validation_data, validation_labels))
     59     model.save_weights(top_model_weights_path)

~/anaconda/lib/python3.6/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
    861                               class_weight=class_weight,
    862                               sample_weight=sample_weight,
--> 863                               initial_epoch=initial_epoch)
    864 
    865     def evaluate(self, x, y, batch_size=32, verbose=1,

~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
   1356             class_weight=class_weight,
   1357             check_batch_axis=False,
-> 1358             batch_size=batch_size)
   1359         # Prepare validation data.
   1360         if validation_data:

~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
   1244                           for (ref, sw, cw, mode)
   1245                           in zip(y, sample_weights, class_weights, self._feed_sample_weight_modes)]
-> 1246         _check_array_lengths(x, y, sample_weights)
   1247         _check_loss_and_target_compatibility(y,
   1248                                              self._feed_loss_fns,

~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _check_array_lengths(inputs, targets, weights)
    235                          'the same number of samples as target arrays. '
    236                          'Found ' + str(list(set_x)[0]) + ' input samples '
--> 237                          'and ' + str(list(set_y)[0]) + ' target samples.')
    238     if len(set_w) > 1:
    239         raise ValueError('All sample_weight arrays should have '

ValueError: Input arrays should have the same number of samples as target arrays. Found 16 input samples and 30 target samples.

EDIT2 解决方案:

我解决了这个问题,使代码发生了根本性的变化。可以看到here

1 个答案:

答案 0 :(得分:2)

  • 您有&#34;输入数据&#34;,这是您的图像集。 - 形状:(BatchSize,w,h,channels)
  • 你有&#34;输出数据/真值/预测&#34;哪些是班级。形状:(BatchSize,3)

错误消息告诉您,您提供的形状类似(BatchSize,1)的模型输出数据不适合模型。

因此,在创建train_labels时肯定会遇到问题。

您必须将其整形为(3000,3)。与每个班级相关的指数应为1:

  • 第1类:[1,0,0]
  • 第2类:[0,1,0]
  • 第3类:[0,0,1]

您可能已经合并了课程(如果您的情况可能的话)。

使用keras.utils.to_categorical()

但请确保train_labels.shape[0]train_data.shape[0]完全相同。

from keras.utils import to_categorical

train_labels = np.array([0] * (nb_train_samples // n_classes) + [1] * (nb_train_samples // n_classes) + 
                        [2] * (nb_train_samples // n_classes))

train_labels = to_categorical(train_labels)

另一种创建标签的简单方法:

train_labels = np.zeros((30,3))
train_labels[:10,0] = 1.
train_labels[10:20,1] = 1.
train_labels[20:,2] = 1.