Question

我制作了一个程序，该课程使用EMNIST按类作为数据集来预测喀拉拉邦的字符，而训练使我花了10个小时在gpu上做了一个CNN体系结构，该体系结构给我带来了很好的准确性和更少的损失
如图像和预测图片所示，我得到了.. enter image description here

enter image description here

所以我在程序中使用1个python文件进行训练，并通过将每个字符分开来预测单词。培训文件包含以下架构：

K.set_learning_phase(1)

model = Sequential()

model.add(Reshape((28,28,1), input_shape=(784,)))
model.add(Convolution2D(32, (5,5), input_shape=(28,28,1),
                         activation='relu',padding='same',
                        kernel_constraint=maxnorm(3)))
model.add(Convolution2D(32, (5,5),activation='relu'))

model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Convolution2D(64,(5,5), activation='relu', padding='same', 
``kernel_constraint=maxnorm(3)))
model.add(Convolution2D(64, (5,5), activation='relu'))

model.add(MaxPooling2D(pool_size=(2,2)))``

model.add(Flatten())

model.add(Dense(1024, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dense(512, activation='relu', kernel_constraint=maxnorm(3)))

model.add(Dropout(0.5))

model.add(Dense(62, activation='softmax'))

#opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
#opt = optimizers.Adadelta()
opt = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, 
`decay=0.0)
model.compile(loss='categorical_crossentropy', optimizer=opt, 
metrics=`['accuracy'])


print(model.summary())
history = model.fit(train_images,train_labels,validation_data=(test_images, 
``test_labels), 

batch_size=128, epochs=200)

    #evaluating model on test data. will take time
    scores = model.evaluate(test_images,test_labels, verbose = 0)
    print("Accuracy: %.2f%%"%(scores[1]*100))````

以及用于加载模型和预测角色的代码

`json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)


loaded_model.load_weights('model.h5')

model = loaded_model


print('Model successfully loaded')

characters = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 
'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
          'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
          'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 
't', 'u', 'v', 'w', 'x', 'y', 'z']

# enter input image here
image = cv2.imread('example.png')

height, width, depth = image.shape

# resizing the image to find spaces better
image = cv2.resize(image, dsize=(width * 5, height * 4), 
interpolation=cv2.INTER_CUBIC)
# grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# binary
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

# dilation
kernel = np.ones((5, 5), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)

# adding GaussianBlur
gsblur = cv2.GaussianBlur(img_dilation, (5, 5), 0)

# find contours
ctrs, hier = cv2.findContours(gsblur.copy(), cv2.RETR_EXTERNAL, 
cv2.CHAIN_APPROX_SIMPLE)

m = list()
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
pchl = list()
dp = image.copy()
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
cv2.rectangle(dp, (x - 10, y - 10), (x + w + 10, y + h + 10), (90, 0, 255), 
9)

plt.imshow(dp)
plt.show()
plt.close()

for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y - 10:y + h + 10, x - 10:x + w + 10]
roi = cv2.resize(roi, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

roi = np.array(roi)
t = np.copy(roi)
t = t / 255.0
t = 1 - t
t = t.reshape(1, 784)
m.append(roi)
pred = model.predict_classes(t)
pchl.append(pred)

pcw = list()
interp = 'bilinear'
fig, axs = plt.subplots(nrows=len(sorted_ctrs), sharex=True, figsize=(1, 
len(sorted_ctrs)))
for i in range(len(pchl)):
# print (pchl[i][0])
pcw.append(characters[pchl[i][0]])
axs[i].set_title('-------> predicted letter: ' + characters[pchl[i][0]], 
x=2.5, y=0.24)
axs[i].imshow(m[i], interpolation=interp)

plt.show()

predstring = ''.join(pcw)
print('Predicted String: ' + predstring)

` 用于创建图的代码

# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.grid()
plt.savefig('Accuracy')
#plt.show()
#plt.close()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.grid()
plt.savefig('Loss')

我得到的输出是我上面放的图像，感谢您的建议和帮助。

Answer 1

您的模型显然过拟合，您可以看到它，因为您的测试准确度明显不同于训练准确度。

您可能希望研究正则化技术来避免这种情况。

准确度高，损失少，分类错误预测Tf / keras

1 个答案: