我有一个图像分类任务,有一个庞大的数据集(800,000个标记数据,80,000个未标记数据和10个要分类的类)我已经建立了模型,但是训练过程非常耗时,所以我想要要知道我的模型是否足够强大以完成任务:
img_width, img_height = 150, 150
batch_size = 64
samples_per_epoch = 671429
nb_val_samples = 167852
nb_filters1 = 32
nb_filters2 = 64
nb_filters3 = 128
conv_size = 3
pool_size = 2
classes_num = 10
# Define class weight to handel imbalanced classes
def get_class_weights(counts, smooth_factor=0):
if smooth_factor > 0:
p = max(counts) * smooth_factor
for k in counts:
k += p
majority = max(counts)
return {cls: float(majority / count) for cls, count in enumerate(counts)}
class_weight = get_class_weights([37415, 9344, 128127, 80545, 60559, 64548, 31029, 105297, 309151, 13266], 0.1)
# Initialising the CNN
classifier = Sequential()
# Step 1 - Convolution
classifier.add(Convolution2D(nb_filters1, conv_size, conv_size, input_shape = (img_width, img_height, 3), activation = 'relu', border_mode='same'))
# Step 2 - Pooling
classifier.add(MaxPooling2D(pool_size = (pool_size, pool_size)))
# Adding a second convolutional layer
classifier.add(Convolution2D(nb_filters2, conv_size, conv_size, activation = 'relu', border_mode='same'))
classifier.add(MaxPooling2D(pool_size = (pool_size, pool_size)))
# Adding a third convolutional layer
classifier.add(Convolution2D(nb_filters3, conv_size, conv_size, activation = 'relu', border_mode='same'))
classifier.add(MaxPooling2D(pool_size = (pool_size, pool_size)))
# Step 3 - Flattening
classifier.add(Flatten())
# Step 4 - Full connection
classifier.add(Dense(output_dim = 1024, activation = 'relu'))
classifier.add(Dropout(0.2))
classifier.add(Dense(output_dim = 1024, activation = 'relu'))
classifier.add(Dropout(0.2))
classifier.add(Dense(output_dim = classes_num, activation = 'softmax'))
# Compiling the CNN
classifier.compile(optimizer = 'RMSprop', loss = 'categorical_crossentropy', metrics = ['accuracy'])