Python / OpenCV - 不检测网格

时间:2015-03-05 14:53:36

标签: python opencv

以下脚本来自http://projectproto.blogspot.co.uk/2014/07/opencv-python-2048-game-solver.html

import cv2
import numpy as np
import win32api, win32gui, win32ui, win32con, win32com.client
from PIL import Image, ImageFont, ImageDraw, ImageOps

# create training model based on the given TTF font file
# http://projectproto.blogspot.com/2014/07/opencv-python-digit-recognition.html
def createDigitsModel(fontfile, digitheight):
    font = ImageFont.truetype(fontfile, digitheight)
    samples =  np.empty((0,digitheight*(digitheight/2)))
    responses = []
    for n in range(10):
        pil_im = Image.new("RGB", (digitheight, digitheight*2))
        ImageDraw.Draw(pil_im).text((0, 0), str(n), font=font)
        pil_im = pil_im.crop(pil_im.getbbox())
        pil_im = ImageOps.invert(pil_im)
        #pil_im.save(str(n) + ".png")

        # convert to cv image
        cv_image = cv2.cvtColor(np.array( pil_im ), cv2.COLOR_RGBA2BGRA)
        gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray,(5,5),0)
        thresh = cv2.adaptiveThreshold(blur,255,1,1,11,2)

        roi = cv2.resize(thresh,(digitheight,digitheight/2))
        responses.append( n )
        sample = roi.reshape((1,digitheight*(digitheight/2)))
        samples = np.append(samples,sample,0)

    samples = np.array(samples,np.float32)
    responses = np.array(responses,np.float32)

    model = cv2.KNearest()
    model.train(samples,responses)
    return model

class Board(object):
    UP, DOWN, LEFT, RIGHT = 1, 2, 3, 4
    FONT = "font/ClearSans-Bold.ttf"
    def __init__(self, clientwindowtitle):
        self.hwnd = self.getClientWindow(clientwindowtitle)
        if not self.hwnd:
            return
        self.hwndDC = win32gui.GetWindowDC(self.hwnd)
        self.mfcDC  = win32ui.CreateDCFromHandle(self.hwndDC)
        self.saveDC = self.mfcDC.CreateCompatibleDC()

        self.cl, self.ct, right, bot = win32gui.GetClientRect(self.hwnd)
        self.cw, self.ch = right-self.cl, bot-self.ct
        self.cl += win32api.GetSystemMetrics(win32con.SM_CXSIZEFRAME)
        self.ct += win32api.GetSystemMetrics(win32con.SM_CYSIZEFRAME)
        self.ct += win32api.GetSystemMetrics(win32con.SM_CYCAPTION)
        self.ch += win32api.GetSystemMetrics(win32con.SM_CYSIZEFRAME)*2

        self.saveBitMap = win32ui.CreateBitmap()
        self.saveBitMap.CreateCompatibleBitmap(self.mfcDC, self.cw, self.ch)
        self.saveDC.SelectObject(self.saveBitMap)

        self.tiles, self.tileheight, self.contour = self.findTiles(self.getClientFrame())
        if not len(self.tiles):
            return
        self.digitheight = self.tileheight / 2
        self.digitsmodel = createDigitsModel(self.FONT, self.digitheight)

        self.update()

    def getClientWindow(self, windowtitle):
        toplist, winlist = [], []
        def enum_cb(hwnd, results):
            winlist.append((hwnd, win32gui.GetWindowText(hwnd)))
        win32gui.EnumWindows(enum_cb, toplist)
        window = [(hwnd, title) for hwnd, title in winlist if windowtitle.lower() in title.lower()]
        if not len(window):
            return 0
        return window[0][0]

    def getClientFrame(self):
        self.saveDC.BitBlt((0, 0), (self.cw, self.ch),
                    self.mfcDC, (self.cl, self.ct), win32con.SRCCOPY)

        bmpinfo = self.saveBitMap.GetInfo()
        bmpstr = self.saveBitMap.GetBitmapBits(True)

        pil_img = Image.frombuffer( 'RGB',
            (bmpinfo['bmWidth'], bmpinfo['bmHeight']),
            bmpstr, 'raw', 'BGRX', 0, 1)

        array = np.array( pil_img )
        cvimage = cv2.cvtColor(array, cv2.COLOR_RGBA2BGRA)
        return cvimage

    def findTiles(self, cvframe):
        tiles, avgh = [], 0

        gray = cv2.cvtColor(cvframe,cv2.COLOR_BGRA2GRAY)
        thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
        contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        def findBoard(contours): # get largest square
            ww, sqcnt = 10, None
            for cnt in contours:
                x,y,w,h = cv2.boundingRect(cnt)
                if w>ww and abs(w-h)<w/10:
                    ww = w
                    sqcnt = cnt
            return sqcnt

        board = findBoard(contours)
        if board==None:
            print 'board not found!'
            return tiles, avgh, board

        bx,by,bw,bh = cv2.boundingRect(board)
        #cv2.rectangle(cvframe,(bx,by),(bx+bw,by+bh),(0,255,0),2)
        #cv2.imshow('board',cvframe)
        #cv2.waitKey(0)
        #cv2.destroyWindow( 'board' )        
        maxh = bh/4
        minh = (maxh*4)/5
        count = 0
        for contour in contours:
            x,y,w,h = cv2.boundingRect(contour)
            if y>by and w>minh and w<maxh and h>minh and h<maxh:
                avgh += h
                count += 1
        if not count:
            print 'no tile found!'
            return tiles, avgh, board

        avgh = avgh / count
        margin = (bh-avgh*4)/5
        for row in range(4):
            for col in range(4):
                x0 = bx + avgh*col + margin*(col+1)
                x1 = x0 + avgh
                y0 = by + avgh*row + margin*(row+1)
                y1 = y0 + avgh
                tiles.append([x0, y0, x1, y1])
                #cv2.rectangle(cvframe,(x0,y0),(x1,y1),(0,255,0),2)
        #cv2.imshow('tiles',cvframe)
        #cv2.waitKey(0)
        #cv2.destroyWindow( 'tiles' )       
        return tiles, avgh, board

    def getTileThreshold(self, tileimage):
        gray = cv2.cvtColor(tileimage,cv2.COLOR_BGR2GRAY)
        row, col = gray.shape
        tmp = gray.copy().reshape(1, row*col)
        counts = np.bincount(tmp[0])
        sort = np.sort(counts)

        modes, freqs = [], []
        for i in range(len(sort)):
            freq = sort[-1-i]
            if freq < 4:
                break
            mode = np.where(counts==freq)[0][0]
            modes.append(mode)
            freqs.append(freq)

        bg, fg = modes[0], modes[0]
        for i in range(len(modes)):
            fg = modes[i]
            #if abs(bg-fg)>=48:
            if abs(bg-fg)>32 and abs(fg-150)>4: # 150?!
                break
        #print bg, fg
        if bg>fg: # needs dark background ?
            tmp = 255 - tmp
            bg, fg = 255-bg, 255-fg

        tmp = tmp.reshape(row, col)
        ret, thresh = cv2.threshold(tmp,(bg+fg)/2,255,cv2.THRESH_BINARY)        
        return thresh

    def getTileNumbers(self, cvframe):
        numbers = []
        outframe = np.zeros(cvframe.shape,np.uint8)
        def guessNumber(digits):
            for i in range(1,16):
                nn = 2**i
                ss = str(nn)
                dd = [int(c) for c in ss]
                if set(digits) == set(dd):
                    return nn
            return 0

        for tile in self.tiles:
            x0,y0,x1,y1 = tile
            tileimage = cvframe[y0:y1,x0:x1]
            cv2.rectangle(cvframe,(x0,y0),(x1,y1),(0,255,0),2)
            cv2.rectangle(outframe,(x0,y0),(x1,y1),(0,255,0),1)

            thresh = self.getTileThreshold(tileimage)
            contours,hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

            dh = self.digitheight
            digits = []
            for cnt in contours:
                x,y,w,h = cv2.boundingRect(cnt)
                if  h>w and h>(dh*1)/5 and h<(dh*6)/5:
                    cv2.rectangle(cvframe,(x0+x,y0+y),(x0+x+w,y0+y+h),(0,0,255),1)
                    roi = thresh[y:y+h,x:x+w]
                    roi = cv2.resize(roi,(dh,dh/2))
                    roi = roi.reshape((1,dh*(dh/2)))
                    roi = np.float32(roi)
                    retval, results, neigh_resp, dists = self.digitsmodel.find_nearest(roi, k=1)
                    digit = int((results[0][0]))
                    string = str(digit)
                    digits.append(digit)
                    cv2.putText(outframe,string,(x0+x,y0+y+h),0,float(h)/24,(0,255,0))

            numbers.append(guessNumber(digits))
        return numbers, outframe

    def getWindowHandle(self):
        return self.hwnd

    def getBoardContour(self):
        return self.contour

    def update(self):
        frame = self.getClientFrame()
        self.tilenumbers, outframe = self.getTileNumbers(frame)
        return self.tilenumbers, frame, outframe

    def copyTileNumbers(self):
        return self.tilenumbers[:]

    def getCell(self, tiles, x, y):
        return tiles[(y*4)+x]

    def setCell(self, tiles, x, y, v):
        tiles[(y*4)+x] = v
        return tiles

    def getCol(self, tiles, x):
        return [self.getCell(tiles, x, i) for i in range(4)]

    def setCol(self, tiles, x, col):
        for i in range(4):
            self.setCell(tiles, x, i, col[i])
        return tiles

    def getLine(self, tiles, y):
        return [self.getCell(tiles, i, y) for i in range(4)]

    def setLine(self, tiles, y, line):
        for i in range(4):
            self.setCell(tiles, i, y, line[i])
        return tiles

    def validMove(self, tilenumbers, direction):
        if direction == self.UP or direction == self.DOWN:
            for x in range(4):
                col = self.getCol(tilenumbers, x)
                for y in range(4):
                    if(y < 4-1 and col[y] == col[y+1] and col[y]!=0):
                        return True
                    if(direction == self.DOWN and y > 0 and col[y] == 0 and col[y-1]!=0):
                        return True
                    if(direction == self.UP and y < 4-1 and col[y] == 0 and col[y+1]!=0):
                        return True
        if direction == self.LEFT or direction == self.RIGHT:
            for y in range(4):
                line = self.getLine(tilenumbers, y)
                for x in range(4):
                    if(x < 4-1 and line[x] == line[x+1] and line[x]!=0):
                        return True
                    if(direction == self.RIGHT and x > 0 and line[x] == 0 and line[x-1]!=0):
                        return True
                    if(direction == self.LEFT and x < 4-1 and line[x] == 0 and line[x+1]!=0):
                        return True
        return False

    def moveTileNumbers(self, tilenumbers, direction):
        def collapseline(line, direction):
            if (direction==self.LEFT or direction==self.UP):
                inc = 1
                rg = xrange(0, 4-1, inc)
            else:
                inc = -1
                rg = xrange(4-1, 0, inc)
            pts = 0
            for i in rg:
                if line[i] == 0:
                    continue
                if line[i] == line[i+inc]:
                    v = line[i]*2
                    line[i] = v
                    line[i+inc] = 0
                    pts += v
            return line, pts
        def moveline(line, directsion):
            nl = [c for c in line if c != 0]
            if directsion==self.UP or directsion==self.LEFT:
                return nl + [0] * (4 - len(nl))
            return [0] * (4 - len(nl)) + nl

        score = 0
        if direction==self.LEFT or direction==self.RIGHT:
            for i in range(4):
                origin = self.getLine(tilenumbers, i)
                line = moveline(origin, direction)
                collapsed, pts = collapseline(line, direction)
                new = moveline(collapsed, direction)
                tilenumbers = self.setLine(tilenumbers, i, new)
                score += pts
        elif direction==self.UP or direction==self.DOWN:
            for i in range(4):
                origin = self.getCol(tilenumbers, i)
                line = moveline(origin, direction)
                collapsed, pts = collapseline(line, direction)
                new = moveline(collapsed, direction)
                tilenumbers = self.setCol(tilenumbers, i, new)
                score += pts

        return score, tilenumbers      

# AI based on "term2048-AI"
# https://github.com/Nicola17/term2048-AI
class AI(object):
    def __init__(self, board):
        self.board = board

    def nextMove(self):
        tilenumbers = self.board.copyTileNumbers()
        m, s = self.nextMoveRecur(tilenumbers[:],3,3)
        return m

    def nextMoveRecur(self, tilenumbers, depth, maxDepth, base=0.9):
        bestMove, bestScore = 0, -1
        for m in range(1,5):
            if(self.board.validMove(tilenumbers, m)):
                score, newtiles = self.board.moveTileNumbers(tilenumbers[:], m)
                score, critical = self.evaluate(newtiles)
                newtiles = self.board.setCell(newtiles,critical[0],critical[1],2)
                if depth != 0:
                    my_m,my_s = self.nextMoveRecur(newtiles[:],depth-1,maxDepth)
                    score += my_s*pow(base,maxDepth-depth+1)
                if(score > bestScore):
                    bestMove = m
                    bestScore = score

        return bestMove, bestScore

    def evaluate(self, tilenumbers, commonRatio=0.25):

        maxVal = 0.
        criticalTile = (-1, -1)

        for i in range(8):
            linearWeightedVal = 0
            invert = False if i<4 else True
            weight = 1.
            ctile = (-1,-1)

            cond = i%4
            for y in range(4):
                for x in range(4):
                    if cond==0:
                        b_x = 4-1-x if invert else x
                        b_y = y
                    elif cond==1:
                        b_x = x
                        b_y = 4-1-y if invert else y
                    elif cond==2:
                        b_x = 4-1-x if invert else x
                        b_y = 4-1-y
                    elif cond==3:
                        b_x = 4-1-x
                        b_y = 4-1-y if invert else y

                    currVal=self.board.getCell(tilenumbers,b_x,b_y)
                    if(currVal == 0 and ctile == (-1,-1)):
                        ctile = (b_x,b_y)
                    linearWeightedVal += currVal*weight
                    weight *= commonRatio
                invert = not invert

            if linearWeightedVal > maxVal:
                maxVal = linearWeightedVal
                criticalTile = ctile

        return maxVal, criticalTile

    def solveBoard(self, moveinterval=500):
        boardHWND = self.board.getWindowHandle()
        if not boardHWND:
            return False
        bx, by, bw, bh = cv2.boundingRect(self.board.getBoardContour())
        x0, x1, y0, y1 = bx, bx+bw, by, by+bh

        win32gui.SetForegroundWindow(boardHWND)
        shell = win32com.client.Dispatch('WScript.Shell')
        print 'Set the focus to the Game Window, and the press this arrow key:'
        keymove = ['UP', 'DOWN', 'LEFT', 'RIGHT']

        delay = moveinterval / 3 # milliseconds delay to cancel board animation effect
        prev_numbers = []
        while True:
            numbers, inframe, outframe = self.board.update()
            if numbers != prev_numbers:
                cv2.waitKey(delay)
                numbers, inframe, outframe = self.board.update()
                if numbers == prev_numbers: # recheck if has changed
                    continue
                prev_numbers = numbers
                move = ai.nextMove()
                if move:
                    key = keymove[move-1]
                    shell.SendKeys('{%s}'%key)
                    print key
                    cv2.waitKey(delay)
                    cv2.imshow('CV copy',inframe[y0:y1,x0:x1])
                    cv2.imshow('CV out', outframe[y0:y1,x0:x1])
            cv2.waitKey(delay)
        cv2.destroyWindow( 'CV copy' )
        cv2.destroyWindow( 'CV out' )


# http://gabrielecirulli.github.io/2048/
# http://ov3y.github.io/2048-AI/
board = Board("2048 - Google Chrome")
#board = Board("2048 - Mozilla Firefox")

ai = AI(board)
ai.solveBoard(360)

print 'stopped.'

我打开Goog​​le Chrome并打开示例网址http://ov3y.github.io/2048-AI/,运行该脚本时出现以下错误:

20.py:109: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  if board==None:
no tile found!
Set the focus to the Game Window, and the press this arrow key:

然后什么都没有,它只是坐在那里。所以我最关心的部分是no tile found!。不评论这些行:

#cv2.rectangle(cvframe,(bx,by),(bx+bw,by+bh),(0,255,0),2)
#cv2.imshow('board',cvframe)
#cv2.waitKey(0)
#cv2.destroyWindow( 'board' )        

在屏幕上显示以下窗口:

enter image description here

有人可以解释为什么OpenCV无法检测网格,或者如何调试这个?

1 个答案:

答案 0 :(得分:2)

很可能它不是检测网格的问题,但是在捕获浏览器窗口时 - 您正试图在空图像上找到网格当然失败。首先确保你已经正确地抓住了firefox / chrome / opera屏幕窗口 - 在函数getClientFrame(self)中输入了这段代码:

cv2.imshow('browser window', cvimage)
cv2.waitKey(10000)

在最终return cvimage之前。它应该显示浏览器窗口10秒钟。如果它不是,它将100%确定问题是捕获浏览器窗口,而不是检测网格。要检查捕获浏览器窗口有什么问题,请使用win32api.GetLastError()功能(您可以检查错误代码here)。

当然有可能我错了,检测网格有问题 - 如果是,请提供示例图片(只保存我提供的代码显示的图片),这样我们就可以了测试一下。

<强> \\编辑:
我刚刚注意到你帖子的第二部分 - 所以我很可能错了,但无论如何你都可以测试它。您似乎正在捕获一个镀铬窗口和一些其他窗口的一部分 - 尝试使您的浏览器窗口全屏。

<强> \\ EDIT2:

仔细观察你的图像后,我意识到奇怪的事情 - 捕捉图像有垂直线条,并且宽度(右侧没有重复部分)小于原始窗口(但高度似乎很好)。宽度似乎是原始宽度的75%所以我猜PIL将每4个字节视为一个像素,但它应该每个像素仅使用3个字节。很难对它进行测试,因为在我的系统上(赢得8.1 64位),它运行良好。可能的解决方案(我无法测试它们,所以你需要检查哪一个会起作用.. sory :)):

  1. 尝试更改此行:

        pil_img = Image.frombuffer( 'RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
    

    这样的事情: pil_img = Image.frombuffer(&#39; RGB&#39;,(bmpinfo [&#39; bmWidth&#39;],bmpinfo [&#39; bmHeight&#39;]),bmpstr,&#39; raw&#39; ;,&#39; BGR&#39;,0,1) 通常,您需要将第五个参数的值从BGRX更改为其他内容 - 最有可能是BGR&#39;选项的完整列表为here。如果它不能工作,请尝试使用不同的第一和第五参数值。

  2. 在屏幕截图中,您看起来有一些相当旧的Windows版本,或者至少您使用的是旧版gui(这非常棒!)。如果 - 除了将gui风格设置为&#34;旧风格&#34; - 您已将(或窗口已为您完成)将您的颜色质量设置为“#34;最高(32位)&#34;它也可能导致你的问题。尝试将其设置为&#34;最高(32位)&#34;。要清楚 - 我正在谈论这个窗口的设置:
    enter image description here
    (在右侧,靠近底部和调色板)。
  3. 如果您有2个(或更多)屏幕,请在仅使用一个屏幕时对您进行测试。此外,如果您正在使用一些替代窗口管理器(或某些其他奇怪的扩展,例如多个桌面的东西),请关闭它并再试一次。