在下面的脚本中我想训练一个带有mnist数据的CNN。 数据位于dataset_dir中,所有图像都是.png。标签是图像的文件夹。 现在我想用生成器训练CNN,它生成图像路径和标签。
from __future__ import print_function
import cv2, numpy as np
import random, os, glob, time
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
dataset_dir = '/home/viktor/PycharmProjects/Datasets/mnist_png/training/**/*.png' #lies alle bilder ein, egal ob train oder test set, wir splitten spaeter
NUM_CLASSES = 10
BATCH_SIZE = 128
NUM_EPOCHS = 1
def paths_and_labels(dataset_dir):
# dataset_dir = '/home/viktor/PycharmProjects/Datasets/mnist_png/training/**/*.png' # dataset_dir soll so aussehen, die zwei ** sind die klassennamen, ordner** ist also ordner_auto, ordner_person,...
paths = glob.glob(dataset_dir)
labels = []
labels.append([os.path.basename(os.path.dirname(path)) for path in paths])
labels = labels[0] #labels als string
class_names = list(set(labels)) # welche klassen gibt es, string
labels_classes = np.zeros([len(labels)], dtype=int)
for i, class_name in enumerate(class_names):
class_index = [j for j, x in enumerate(labels) if x == class_name]
labels_classes[class_index] = i # labels als int, um die klasse als string zu bekommen, mach = class_names[labels_class[nr]]
paths_and_labels_classes = list(zip(paths, labels_classes))
random.shuffle(paths_and_labels_classes)
paths, labels_classes = zip(*paths_and_labels_classes)
train_paths = paths[0:int(0.6 * len(paths))]
train_labels = labels_classes[0:int(0.6 * len(labels_classes))]
test_paths = paths[int(0.8 * len(paths)):]
test_labels = labels_classes[int(0.8 * len(labels_classes)):]
val_paths = paths[int(0.6 * len(paths)):int(0.8 * len(paths))]
val_labels = labels_classes[int(0.6 * len(paths)):int(0.8 * len(paths))]
return train_paths, train_labels, test_paths, test_labels, val_paths, val_labels, class_names
def generator(image_paths, labels, batch_size):
batch_features = np.zeros((batch_size, 28, 28, 1)) #oder 224,224,3
batch_labels = np.zeros((batch_size, NUM_CLASSES))
while True:
for i in range(batch_size):
# choose random index of one image in image_paths
index = np.random.choice(len(image_paths), 1, replace=False)[0]
im = cv2.resize(cv2.imread(image_paths[index],0), (28, 28)).astype(np.float32)
im = np.expand_dims(im, axis=2) # weil es ein CHANNEL ist, spaeter muss das glaube ich weg
batch_features[i] = im
batch_labels[i][labels[index]] = 1
batch_features = batch_features.astype('float32')
batch_features /= 255
yield (batch_features, batch_labels)
train_paths, train_labels, test_paths, test_labels, val_paths, val_labels, c_names = paths_and_labels(dataset_dir)
input_shape = (28, 28, 1)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.summary()
#my_callback = [keras.callbacks.TensorBoard(log_dir='/home/viktor/PycharmProjects/CNN_Object_Classification/logs2', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)]
start_time = time.clock()
model.fit_generator(generator=generator(train_paths, train_labels, BATCH_SIZE), steps_per_epoch=int(len(train_paths)/BATCH_SIZE), epochs=NUM_EPOCHS)#,callbacks=my_callback)
elapsed_time = time.clock() - start_time
print('elapsed time: ', elapsed_time)
结果不好
1/281 [..............................] - ETA: 493s - loss: 16.0029 - acc: 0.1250
2/281 [..............................] - ETA: 255s - loss: 15.9644 - acc: 0.0820
3/281 [..............................] - ETA: 175s - loss: 39.3005 - acc: 0.0729
4/281 [..............................] - ETA: 135s - loss: 49.8761 - acc: 0.0742
5/281 [..............................] - ETA: 113s - loss: 55.0494 - acc: 0.0703
277/281 [============================>.] - ETA: 0s - loss: 25.6649 - acc: 0.0346
278/281 [============================>.] - ETA: 0s - loss: 25.6554 - acc: 0.0345
279/281 [============================>.] - ETA: 0s - loss: 25.6460 - acc: 0.0343
280/281 [============================>.] - ETA: 0s - loss: 25.6367 - acc: 0.0342
281/281 [==============================] - 22s - loss: 25.6274 - acc: 0.0341
elapsed time: 37.915506
如果我尝试同样的事情,但是在将所有图像提供给生成器之前加载所有图像(而不仅仅是给出路径),那就更好了。
from __future__ import print_function
import cv2, numpy as np
import random, os, glob, time
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
dataset_dir = '/home/viktor/PycharmProjects/Datasets/mnist_png/training/**/*.png' #lies alle bilder ein, egal ob train oder test set, wir splitten spaeter
NUM_CLASSES = 10
BATCH_SIZE = 128
NUM_EPOCHS = 1
def paths_and_labels(dataset_dir):
# dataset_dir = '/home/viktor/PycharmProjects/Datasets/mnist_png/training/**/*.png' # dataset_dir soll so aussehen, die zwei ** sind die klassennamen, ordner** ist also ordner_auto, ordner_person,...
paths = glob.glob(dataset_dir)
labels = []
labels.append([os.path.basename(os.path.dirname(path)) for path in paths])
labels = labels[0] #labels als string
class_names = list(set(labels)) # welche klassen gibt es, string
labels_classes = np.zeros([len(labels)], dtype=int)
for i, class_name in enumerate(class_names):
class_index = [j for j, x in enumerate(labels) if x == class_name]
labels_classes[class_index] = i # labels als int, um die klasse als string zu bekommen, mach = class_names[labels_class[nr]]
paths_and_labels_classes = list(zip(paths, labels_classes))
random.shuffle(paths_and_labels_classes)
paths, labels_classes = zip(*paths_and_labels_classes)
train_paths = paths[0:int(0.6 * len(paths))]
train_labels = labels_classes[0:int(0.6 * len(labels_classes))]
test_paths = paths[int(0.8 * len(paths)):]
test_labels = labels_classes[int(0.8 * len(labels_classes)):]
val_paths = paths[int(0.6 * len(paths)):int(0.8 * len(paths))]
val_labels = labels_classes[int(0.6 * len(paths)):int(0.8 * len(paths))]
train_images = np.zeros((len(train_paths), 28, 28, 1)) # oder 224,224,3
train_labels_bin = np.zeros((len(train_paths), NUM_CLASSES))
for i in range(len(train_paths)):
im = cv2.resize(cv2.imread(train_paths[i], 0), (28, 28)).astype(np.float32)
im = np.expand_dims(im, axis=2) # weil es ein CHANNEL ist, spaeter muss das glaube ich weg
train_images[i] = im
train_labels_bin[i][train_labels[i]] = 1
train_images = train_images.astype('float32')
train_images /= 255
return train_images, train_labels_bin
def generator(images, labels, batch_size):
batch_features = np.zeros((batch_size, 28, 28, 1),np.float32) #oder 224,224,3
batch_labels = np.zeros((batch_size, NUM_CLASSES))
while True:
for i in range(batch_size):
# choose random index of one image in image_paths
index = np.random.choice(len(images), 1, replace=False)[0]
batch_features[i] = images[index]
batch_labels[i] = labels[index]
yield (batch_features, batch_labels)
train_images, train_labels_bin = paths_and_labels(dataset_dir)
input_shape = (28, 28, 1)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.summary()
#my_callback = [keras.callbacks.TensorBoard(log_dir='/home/viktor/PycharmProjects/CNN_Object_Classification/logs2', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)]
start_time = time.clock()
model.fit_generator(generator=generator(train_images, train_labels_bin, BATCH_SIZE), steps_per_epoch=int(len(train_images)/BATCH_SIZE), epochs=NUM_EPOCHS)#,callbacks=my_callback)
elapsed_time = time.clock() - start_time
print('elapsed time: ', elapsed_time)
结果:
1/281 [..............................] - ETA: 461s - loss: 2.3143 - acc: 0.0859
2/281 [..............................] - ETA: 238s - loss: 2.3017 - acc: 0.1133
3/281 [..............................] - ETA: 164s - loss: 2.2830 - acc: 0.1536
4/281 [..............................] - ETA: 128s - loss: 2.2632 - acc: 0.1953
5/281 [..............................] - ETA: 106s - loss: 2.2439 - acc: 0.2219
277/281 [============================>.] - ETA: 0s - loss: 0.4778 - acc: 0.8619
278/281 [============================>.] - ETA: 0s - loss: 0.4766 - acc: 0.8623
279/281 [============================>.] - ETA: 0s - loss: 0.4759 - acc: 0.8626
280/281 [============================>.] - ETA: 0s - loss: 0.4747 - acc: 0.8629
281/281 [==============================] - 23s - loss: 0.4735 - acc: 0.8632
elapsed time: 37.089643
我已经检查过发电机的输出是否相同。它是。 所以我的问题是,为什么第一个脚本的损失如此糟糕?一切都是一样的。唯一的区别是,在第一个脚本中我将数据加载到生成器函数中。 在第二个脚本中,我将数据加载到生成器函数之外。
答案 0 :(得分:1)
我发现了我的错误
正确的生成器fkt应该是:
def generator(image_paths, labels, batch_size):
while True:
batch_features = np.zeros((batch_size, 28, 28, 1)) # oder 224,224,3
batch_labels = np.zeros((batch_size, NUM_CLASSES))
for i in range(batch_size):
# choose random index of one image in image_paths
index = np.random.choice(len(image_paths), 1, replace=False)[0]
im = cv2.resize(cv2.imread(image_paths[index],0), (28, 28)).astype(np.float32)
im = np.expand_dims(im, axis=2) # weil es ein CHANNEL ist, spaeter muss das glaube ich weg
batch_features[i] = im
batch_labels[i][labels[index]] = 1
batch_features = batch_features.astype('float32')
batch_features /= 255
yield (batch_features, batch_labels)