Question

这是我正在尝试开发的代码，用于创建 OCR 程序，以便用户可以手写一个数字，并且该程序以高精度返回实际值。

from keras.models import load_model
from tkinter import *
import tkinter as tk
import appscript
#import win32gui
from PIL import ImageGrab, Image
import numpy as np

model = load_model('mnist.h5')

def predict_digit(img):
    #resize image to 28x28 pixels
    img = img.resize((28,28))
    #convert rgb to grayscale
    img = img.convert('L')
    img = np.array(img)
    #reshaping to support our model input and normalizing
    img = img.reshape(1,28,28,1)
    img = img/255.0
    #predicting the class
    res = model.predict([img])[0]
    return np.argmax(res), max(res)

class App(tk.Tk):
    def __init__(self):
        tk.Tk.__init__(self)

        self.x = self.y = 0
        
        # Creating elements
        self.canvas = tk.Canvas(self, width=300, height=300, bg = "white", cursor="cross")
        self.label = tk.Label(self, text="Draw..", font=("Helvetica", 48))
        self.classify_btn = tk.Button(self, text = "Recognise", command = self.classify_handwriting)   
        self.button_clear = tk.Button(self, text = "Clear", command = self.clear_all)
       
        # Grid structure
        self.canvas.grid(row=0, column=0, pady=2, sticky=W, )
        self.label.grid(row=0, column=1,pady=2, padx=2)
        self.classify_btn.grid(row=1, column=1, pady=2, padx=2)
        self.button_clear.grid(row=1, column=0, pady=2)
        
        #self.canvas.bind("<Motion>", self.start_pos)
        self.canvas.bind("<B1-Motion>", self.draw_lines)

    def clear_all(self):
        self.canvas.delete("all")
        
    def classify_handwriting(self):
        #HWND = self.canvas.winfo_id()  # get the handle of the canvas
        #rect=self.canvas.coords(HWND)
        #rect = win32gui.GetWindowRect(HWND)  # get the coordinate of the canvas
        #a,b,c,d = rect
        #a,b,c,d=self.canvas.winfo_rootx(), self.canvas.winfo_rooty(), self.canvas.winfo_width(),self.canvas.winfo_height() 
        x, y = (self.canvas.winfo_rootx(), self.canvas.winfo_rooty())
        width, height = (self.canvas.winfo_width(), self.canvas.winfo_height())
        a, b, c, d = (x, y, x+width, y+height)
        rect=(a+4,b+4,c-4,d-4)
        im = ImageGrab.grab(rect)

        digit, acc = predict_digit(im)
        self.label.configure(text= str(digit)+', '+ str(int(acc*100))+'%')

    def draw_lines(self, event):
        self.x = event.x
        self.y = event.y
        r=8
        HWND=self.canvas.create_oval(self.x-r, self.y-r, self.x + r, self.y + r, fill='black')
       
app = App()
mainloop()

这段代码的问题在于，对于我绘制的任何内容，无论我重写多少，它总是返回 0。

如果有python/Keras/Tkinter知识的人可以帮我定位问题并解决它。

模型 here 的源代码。

注意：此代码运行正常，没有错误这是程序窗口

的屏幕截图

Answer 1

程序中使用的神经网络代码不够准确，无法做出正确的预测。训练准确率如下：

代码的实际逻辑没有任何问题。改进神经网络代码将是解决这个问题的方法。

Python 中的 OCR 机器学习 - 使用 keras 训练模型

1 个答案: