我正在尝试制作井字游戏AI,它通过使用minimax算法来最佳地玩游戏。我注意到它并没有做出最佳动作,并且将它与自身的结果放在一起总是会为'X'玩家赢(它应该导致平局)。 这是我的算法代码:
def getBestMove(state, player):
'''
Minimax Algorithm
'''
winner_loser , done = check_current_state(state)
if done == "Done" and winner_loser == 'O': # If AI won
return 1
elif done == "Done" and winner_loser == 'X': # If Human won
return -1
elif done == "Draw": # Draw condition
return 0
moves = []
empty_cells = []
for i in range(3):
for j in range(3):
if state[i][j] is ' ':
empty_cells.append(i*3 + (j+1))
for empty_cell in empty_cells:
move = {}
move['index'] = empty_cell
new_state = copy_game_state(state)
play_move(new_state, player, empty_cell)
if player == 'O': # If AI
result = getBestMove(new_state, 'X') # make more depth tree for human
move['score'] = result
else:
result = getBestMove(new_state, 'O') # make more depth tree for AI
move['score'] = result
moves.append(move)
# Find best move
best_move = None
if player == 'O': # If AI player
best = -infinity
for move in moves:
if move['score'] > best:
best = move['score']
best_move = move['index']
else:
best = infinity
for move in moves:
if move['score'] < best:
best = move['score']
best_move = move['index']
return best_move
在这里我该怎么办?
答案 0 :(得分:0)
我认为,如果遵循标准的minimax算法,例如here,会更容易。我也建议添加alpha-beta修剪以使其速度更快,即使在Tic Tac Toe中并不是必需的。这是我很久以前就可以用来启发灵感的一个游戏示例,它基本上全部取自链接的维基百科页面,并进行了一些小调整,例如move, evaluation = minimax(board, 8, -math.inf, math.inf, True)
def minimax(board, depth, alpha, beta, maximizing_player):
if depth == 0 or board.is_winner() or board.is_board_full():
return None, evaluate(board)
children = board.get_possible_moves(board)
best_move = children[0]
if maximizing_player:
max_eval = -math.inf
for child in children:
board_copy = copy.deepcopy(board)
board_copy.board[child[0]][child[1]].player = 'O'
current_eval = minimax(board_copy, depth - 1, alpha, beta, False)[1]
if current_eval > max_eval:
max_eval = current_eval
best_move = child
alpha = max(alpha, current_eval)
if beta <= alpha:
break
return best_move, max_eval
else:
min_eval = math.inf
for child in children:
board_copy = copy.deepcopy(board)
board_copy.board[child[0]][child[1]].player = 'X'
current_eval = minimax(board_copy, depth - 1, alpha, beta, True)[1]
if current_eval < min_eval:
min_eval = current_eval
best_move = child
beta = min(beta, current_eval)
if beta <= alpha:
break
return best_move, min_eval
def evaluate(board):
if board.is_winner('X'):
return -1
if board.is_winner('O'):
return 1
return 0
进行alpha-beta修剪:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#define MAXLINE 1024
struct my_msgbuf {
long mtype;
char mtext[MAXLINE];
};
int main(void)
{
struct my_msgbuf buf;
int msqid;
key_t key;
if ((key = ftok("client.c", 'B')) == -1) {
perror("ftok");
exit(1);
}
if ((msqid = msgget(key, 0644 | IPC_CREAT)) == -1) {
perror("msgget");
exit(1);
}
printf("Write a text:\n");
buf.mtype = 1;
while( fgets(buf.mtext, MAXLINE, stdin) != NULL ) {
if (msgsnd(msqid, (struct msgbuf *)&buf, sizeof(buf), 0) == -1)
perror("msgsnd");
}
if (msgctl(msqid, IPC_RMID, NULL) == -1) {
perror("msgctl");
exit(1);
}
return 0;
}
请注意,对电路板进行深拷贝(或在递归minimax调用后取消make函数)非常重要,否则您将更改原始电路板的状态,并且会得到一些奇怪的行为。