我实现了negamax,当尝试实现alpha beta版本时,结果是不同的。据我了解,无论节点顺序如何,它们都应该相同。
我的评估函数返回具有玩家角度的平衡值,如果游戏结束则返回MAX(否则赢得MIN)。我的初始参数是alpha = MIN beta = MAX
def negamax(self, player, board, depth):
if depth == 0 or board.end_of_game():
return self.evaluate(player, board), None
value = self.MIN
def try_move(move):
temp_board = deepcopy(board)
temp_board.make_move(move, player)
return temp_board
moves = board.legal_moves(player)
if not moves: # Current player has no move
return self.evaluate(player, board), None
best_move = moves[0]
for m in moves:
value, best_move = max((value, best_move), (-self.negamax(board.opponent(player), try_move(m), depth-1)[0], m))
return value, best_move
def negamax_AB(self, player, board, depth, alpha, beta):
if depth == 0 or board.end_of_game():
return self.evaluate(player, board), None
value = self.MIN
def try_move(move):
temp_board = deepcopy(board)
temp_board.make_move(move, player)
return temp_board
moves = board.legal_moves(player)
if not moves: # Current player has no move
return self.evaluate(player, board), None
best_move = moves[0]
for m in moves:
value, best_move = max((value, best_move), (-self.negamax_AB(board.opponent(player), try_move(m), depth-1, -beta, -alpha)[0], m))
alpha = max(alpha, value)
if alpha >= beta:
break
return value, best_move