因此,我试图找出运行模型时Numpy数组长度减少的问题。我总共有2000张图像,分为2类,每类1000张。现在,当我将这些图像通过vgg16模型传递以进行特征提取时,它仅将1998年的图像传递回来。以下是我的代码。
VGG16模型
def load_vgg16(weights_path='vgg16.h5'):
if K.image_data_format() == 'channels_first':
inputShape = (3, 256, 256)
else:
inputShape = (256, 256, 3)
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape = inputShape))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
if weights_path:
model.load_weights(weights_path, by_name= True)
return model
通过瓶颈功能进行特征提取:
def save_bottleneck_features(location):
batch_size = 16
datagen = ImageDataGenerator(rescale= 1./255)
model = load_vgg16()
generator = datagen.flow_from_directory(location+'/training-data/',
target_size=(image_width, image_height),
batch_size = 16,
class_mode = None,
shuffle = False)
bottleneck_features_train = model.predict_generator(
generator, nb_training_samples / batch_size)
np.save(open(location+'/bottleneck_features_train.npy', 'wb'), bottleneck_features_train)
# repeat with the validation data
generator = datagen.flow_from_directory(location+'/validation-data/',
target_size=(image_width, image_height),
batch_size=16,
class_mode=None,
shuffle=False)
bottleneck_features_validation = model.predict_generator(
generator, nb_validation_samples / batch_size)
np.save(open(location+'/bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)
运行以上功能:
top_model_weights_path='/top_model_weights.h5'
location = 'newdata'
nb_epoch = 50
training_samples= [len(os.listdir('newdata/training-data'+'/'+i))
for i in sorted(os.listdir('newdata/training-data/'))]
nb_training_samples = sum(training_samples)
#Calucalating number of data prsent in the folders or damage and whole cars in validation folder
validation_samples = [len(os.listdir('newdata/validation-data/' + i))
for i in sorted(os.listdir('newdata/validation-data/'))]
nb_validation_samples = sum(validation_samples)
print(training_samples)
print(validation_samples)
save_bottleneck_features('newdata')
因此,当我运行上述代码时,我期望数组bottleneck_features_train
的总长度为2000,但我只能得到1998。同样,bottleneck_features_validation
的总长度为1840,但我希望得到1840(927 + 909)。
training_samples
和validation_samples
的输出如下。
[1000, 1000]
[927, 909]
我认为我传递了model.predict_generator()
中要求的错误步骤数。当我尝试使用此代码标记这些图像时。
training_data = np.load(open(location+'/bottleneck_features_train.npy','rb'))
training_label = np.array([0] * training_samples[0] +
[1] * training_samples[1])
validation_data = np.load(open(location+'/bottleneck_features_validation.npy','rb'))
validation_label = np.array([0] * validation_samples[0] +
[1] * validation_samples[1])
并尝试拟合模型,然后我收到关于训练数据和训练标签的n个长度不同的错误。
如何解决此问题并开始拟合模型?谢谢你的帮助。干杯!