连接4 Minimax AI无法正常工作

时间:2014-12-30 01:02:58

标签: python minimax alpha-beta-pruning

我最近尝试为connect 4实现minimax算法,为它创建一个机器人,以保持简单(可能使事情复杂化)我使用Python。

这是我到目前为止所做的:

# Connect 4 Bot
# Author: Angus Moore (angusmoore73@gmail.com)

import os
import copy
import random

# GLOBAL VARS
height  = 6
blank   = "o"
player1 = "i"
player2 = "-"
currentTurn = 1

class Grid:

    def __init__( self, width = 7 ):
        self.width  = width
        self.grid       = []
        for x in range(0, width):
            # CREATE A TEMP COLUMN TO HOLD THE VALUES BEFORE ADDING THEM TO THE GRID
            tempCol = []
            for y in range(0, height):
                tempCol.append(blank)
            self.grid.append(tempCol)

    def p( self, state, y, x ):
        # CHECK IF THE POINT IS INSIDE THE GRID
        if (y<0 or x<0 or x>=self.width or y>=height): return 0
        else: return state[x][y]

    def checkWinner( self ):

        state = self.grid

        # CHECK FOR VERTICAL WINS
        for x in range( 0, self.width ):
            for y in range( 0, height ):
                if (self.p(state,y,x)!=blank and self.p(state,y,x)==self.p(state,y+1,x) and self.p(state,y,x)==self.p(state,y+2,x) and self.p(state,y,x)==self.p(state,y+3,x)):
                    return self.p(state,y,x)

        # CHECK FOR HORIZONTAL WINS
        for x in range( 0, self.width ):
            for y in range( 0, height ):
                if (self.p(state,y,x)!=blank and self.p(state,y,x)==self.p(state,y,x+1) and self.p(state,y,x)==self.p(state,y,x+2) and self.p(state,y,x)==self.p(state,y,x+3)):
                    return self.p(state,y,x)

        # CHECK FOR DIAGONAL WINS
        for x in range( 0, self.width ):
            for y in range( 0, height ):
                for d in range( -1, 2, 2 ):
                    if (self.p(state,y,x)!=blank and self.p(state,y,x)==self.p(state,y+1,x+1*d) and self.p(state,y,x)==self.p(state,y+2,x+2*d) and self.p(state,y,x)==self.p(state,y+3,x+3*d)):
                        return self.p(state,y,x)

        # NO WIN
        for y in range( 0, height ):
            for x in range( 0, self.width ):
                if (self.p(state,x,y)==blank):
                    return 0

        # TIE (IMPOSSIBLE)
        return -1

    def displayGrid( self ):
        # CLEAR THE CONSOLE FIRST
        os.system('cls')
        for y in range( 0, height ):
            for x in range( 0, self.width ):
                # PUT A PIPE IN BETWEEN ALL COLUMNS (AVOID THE RIGHT OF THE FINAL COLUMN)
                if ( x != len(self.grid) - 1 ):
                    print( self.grid[x][y], end="|" )
                else:
                    print( self.grid[x][y], end="\n" )

    def insert( self, col, token ):

        global currentTurn

        temp = [x[:] for x in self.grid]

        if ( currentTurn == 1 ): currentTurn = 2
        else: currentTurn = 1

        # LOOP THROUGH THE COLUMN (BOTTOM TO TOP)
        for x in range( 0, height ):
            # CHECK IF THE CURRENT POSITION IS BLANK
            if ( temp[ col ][ height - x - 1 ] == blank ):
                temp[ col ][ height - x - 1 ] = token
                return temp
            else: pass

    def isLegal( self, col, state ):
        for i in range( 0, height ):
            if state[col][i] == blank:
                return True
        return False

    def bestMove( self, depth, currState, currPlayer ):

        state = [x[:] for x in currState]

        if ( currPlayer == player1 ): oppPlayer = player2
        else: oppPlayer = player1

        legalMoves = { }

        for col in range( 0, self.width ):
            if self.isLegal( col, state ):
                temp = self.insert( col, currPlayer )
                legalMoves[col] = -self.search( depth - 1, temp, oppPlayer )
        bestAlpha = -999999999
        bestMove = None
        moves = list(legalMoves.items())
        random.shuffle(moves)

        for move, alpha in moves:
            if alpha >= bestAlpha:
                bestAlpha = alpha
                bestMove = move

        return bestMove, bestAlpha

    def search( self, depth, state, currPlayer ):

        legalMoves = [ ]

        for i in range( 0, self.width ):
            if self.isLegal( col, state ):
                temp = self.insert( i, currPlayer )
                legalMoves.append(temp)

        if depth == 0 or len( legalMoves ) == 0 or self.checkWinner() != 0:
            return self.value( state, currPlayer )

        if ( currPlayer == player1 ): oppPlayer = player2
        else: oppPlayer = player1

        alpha = -999999999
        for child in legalMoves:
            if child == None:
                print("No Children!")
            else:
                alpha = max( alpha, -self.search( depth - 1, child, oppPlayer ) )
        return alpha

    def value( self, state, player ):

        if ( player == player1 ): oppPlayer = player2
        else: oppPlayer = player1

        myFours = self.checkStreak( state, player, 4 )
        myThrees    = self.checkStreak( state, player, 3 )
        myTwos  = self.checkStreak( state, player, 2 )
        oppFours    = self.checkStreak( state, oppPlayer, 4 )

        if oppFours > 0: return -100000
        else: return myFours * 100000 + myThrees * 100 + myTwos

    def checkStreak( self, state, player, streak ):
        count = 0
        for y in range( 0, height ):
            for x in range( 0, self.width ):
                if (self.p(state,y,x)==player and self.p(state,y,x)==self.p(state,y+1,x) and self.p(state,y,x)==self.p(state,y+2,x) and self.p(state,y,x)==self.p(state,y+3,x)):
                    count += 1
                if (self.p(state,y,x)==player and self.p(state,y,x)==self.p(state,y,x+1) and self.p(state,y,x)==self.p(state,y,x+2) and self.p(state,y,x)==self.p(state,y,x+3)):
                    count += 1
                for d in range( -1, 2, 2 ):
                    if (self.p(state,y,x)==player and self.p(state,y,x)==self.p(state,y+1,x+1*d) and self.p(state,y,x)==self.p(state,y+2,x+2*d) and self.p(state,y,x)==self.p(state,y+3,x+3*d)):
                        count += 1
        return count

if ( __name__ == '__main__' ):

    # CREATE A NEW GRID
    grid = Grid()

    # GAMELOOP
    while True:

        # DISPLAY THE GRID
        grid.displayGrid()

        # CHECK FOR A WINNER
        if ( grid.checkWinner() == player1 ):
            print("Player 1 wins!!")
            break
        elif ( grid.checkWinner() == player2 ):
            print("Player 2 wins!!")
            break

        # CHANGE PLAYER EVERY LOOP
        if ( currentTurn == 1 ):
            col = int(input( 'Player 1 - Which column: ' )) - 1
            grid.grid = grid.insert( col, player1 )
        elif ( currentTurn == 2 ):
            # col = int(input( 'Player 2 - Which column:' )) - 1
            # grid.insert( col, player2 )
            #print(grid.bestMove(5, grid.grid, player2))
            #input()
            grid.grid = grid.insert( grid.bestMove( 3, grid.grid, player2 )[0], player2 )

有时是有效的。我认为它没有正确评估每一个动作。我已经玩了一段时间了,似乎无法找到它出错的地方。

非常感谢任何帮助,谢谢。

0 个答案:

没有答案