我正在尝试使用alpha-beta pruning为Checkers游戏编写算法(AI vs AI)。你可以看到代码&评论 在PasteBin下方或其中。
游戏本身运行良好,但AI(alpha-beta修剪算法)似乎有错误,因为机器人基本上互相喂食棋子(根本没有计算显示)。该代码包含2个不同版本的alpha-beta算法函数(更详细,更不详细)。
我已尝试在tmp
中跟踪alphabeta()
的值,并且它似乎有正常值(如果深度= 5,则范围从-3到3)。
我也尝试过将this代码实现到我的代码中,但结果却相同。
我最好的猜测是问题出现在bool whiteTurn
中,它现在声明轮到现在,但我找不到任何问题 - 转弯正确切换。
第二个最佳猜测 - Move bestMove
。我不确定将它从递归函数中删除是否正确。
错误是什么?
#include "stdafx.h"
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;
class Move
{
public:
pair<int, int> start;
pair<int, int> end;
bool lethal;
Move(int x, int y, int x1, int y1, bool kill)
{
start.first = x; start.second = y;
end.first = x1; end.second = y1;
lethal = kill;
}
};
char **initBoard(int size)
{
char **board = new char*[size];
for (int count = 0; count < size; count++)
board[count] = new char[size];
return board;
}
void newGame(char **board, int size)
{
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
{
board[i][j] = '-';
if ((i == 0 || i == 2) && j % 2 == 1) board[i][j] = 'O';
if (i == 1 && j % 2 == 0) board[i][j] = 'O';
if ((i == size - 3 || i == size - 1) && j % 2 == 0) board[i][j] = 'X';
if (i == size - 2 && j % 2 == 1) board[i][j] = 'X';
}
}
void printBoard(char **board, int size)
{
for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
cout << board[i][j] << " ";
}
cout << endl;
}
cout << endl;
}
void do_move(char **board, Move play)
{
char temp = board[play.start.first][play.start.second];
board[play.start.first][play.start.second] = board[play.end.first][play.end.second];
board[play.end.first][play.end.second] = temp;
if (play.lethal)
board[(play.end.first + play.start.first) / 2][(play.end.second + play.start.second) / 2] = '-';
}
void undo_move(char **board, Move play)
{
board[play.start.first][play.start.second] = board[play.end.first][play.end.second];
board[play.end.first][play.end.second] = '-';
if (play.lethal)
{
if (board[play.start.first][play.start.second] == 'X')
board[(play.end.first + play.start.first) / 2][(play.end.second + play.start.second) / 2] = 'O';
if (board[play.start.first][play.start.second] == 'O')
board[(play.end.first + play.start.first) / 2][(play.end.second + play.start.second) / 2] = 'X';
}
}
vector<Move> findMoves(char **board, int size, bool whiteTurn)
{
vector<Move> moves;
//first jump (if possible)
for (int x = 0; x < size; x++)
{
for (int y = 0; y < size; y++)
{
if (whiteTurn && board[x][y] == 'X')
{
if (x > 1 && y > 1 && board[x - 1][y - 1] == 'O' && board[x - 2][y - 2] == '-')
moves.push_back(Move(x, y, x - 2, y - 2, true));
if (x > 1 && y < size - 2 && board[x - 1][y + 1] == 'O' && board[x - 2][y + 2] == '-')
moves.push_back(Move(x, y, x - 2, y + 2, true));
if (x < size - 2 && y > 1 && board[x + 1][y - 1] == 'O' && board[x + 2][y - 2] == '-')
moves.push_back(Move(x, y, x + 2, y - 2, true));
if (x < size - 2 && y < size - 2 && board[x + 1][y + 1] == 'O' && board[x + 2][y + 2] == '-')
moves.push_back(Move(x, y, x + 2, y + 2, true));
}
if (!whiteTurn && board[x][y] == 'O')
{
if (x > 1 && y > 1 && board[x - 1][y - 1] == 'X' && board[x - 2][y - 2] == '-')
moves.push_back(Move(x, y, x - 2, y - 2, true));
if (x > 1 && y < size - 2 && board[x - 1][y + 1] == 'X' && board[x - 2][y + 2] == '-')
moves.push_back(Move(x, y, x - 2, y + 2, true));
if (x < size - 2 && y > 1 && board[x + 1][y - 1] == 'X' && board[x + 2][y - 2] == '-')
moves.push_back(Move(x, y, x + 2, y - 2, true));
if (x < size - 2 && y < size - 2 && board[x + 1][y + 1] == 'X' && board[x + 2][y + 2] == '-')
moves.push_back(Move(x, y, x + 2, y + 2, true));
}
}
}
//then move
for (int x = 0; x < size; x++)
{
for (int y = 0; y < size; y++)
{
if (whiteTurn && board[x][y] == 'X')
{
if (x > 0 && y > 0 && board[x - 1][y - 1] == '-')
moves.push_back(Move(x, y, x - 1, y - 1, false));
if (x > 0 && y < size - 1 && board[x - 1][y + 1] == '-')
moves.push_back(Move(x, y, x - 1, y + 1, false));
}
if (!whiteTurn && board[x][y] == 'O')
{
if (x < size - 1 && y > 0 && board[x + 1][y - 1] == '-')
moves.push_back(Move(x, y, x + 1, y - 1, false));
if (x < size - 1 && y < size - 1 && board[x + 1][y + 1] == '-')
moves.push_back(Move(x, y, x + 1, y + 1, false));
}
}
}
return moves;
}
//plain score calculation function
int getScore(char **board, int size, bool whiteTurn)
{
int whiteNum = 0, blackNum = 0;
for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
if (board[i][j] == 'X') whiteNum++;
if (board[i][j] == 'O') blackNum++;
}
}
if (whiteTurn)
return whiteNum - blackNum;
else
return blackNum - whiteNum;
}
//old function, doesnt work as intended too
/*Move getBestMove(char **board, int size, bool whiteTurn)
{
int score, tmp;
Move bestMove(0, 0, 0, 0, false);
vector<Move> movelist = findMoves(board, size, whiteTurn);
score = getScore(board, size, whiteTurn);
for (unsigned int i = 0; i < movelist.size(); i++)
{
do_move(board, movelist.at(i));
tmp = getScore(board, size, whiteTurn);
undo_move(board, movelist.at(i));
if (tmp >= score)
{
score = tmp;
bestMove = movelist.at(i);
}
}
return bestMove;
}*/
//made this global - no idea how to avoid it being global with recursion in alphabeta
Move bestMove(0, 0, 0, 0, false);
//alphabeta function with more detailed calculations
/*int AlphaBeta(char **board, int size, bool whiteTurn, int depth, int alpha, int beta)
{
if (depth == 0) return getScore(board, size, whiteTurn);
int score = -100;
vector<Move> movelist = findMoves(board, size, whiteTurn);
for (unsigned int i = 0; i < movelist.size(); i++)
{
do_move(board, movelist.at(i));
int tmp = -AlphaBeta(board, size, !whiteTurn, depth - 1, alpha, beta);
undo_move(board, movelist.at(i));
if (tmp > score)
{
if (whiteTurn)
{
if (score > alpha)
{
alpha = score;
}
if (-alpha <= beta)
{
return alpha;
}
}
else
{
if (score > beta)
{
beta = score;
}
if (-beta <= alpha)
{
return beta;
}
}
}
}
return score;
}*/
//generic alphabeta function
int alphabeta(char **board, int size, bool whiteTurn, int depth, int alpha, int beta)
{
if (depth == 0) return getScore(board, size, whiteTurn);
vector<Move> movelist = findMoves(board, size, whiteTurn);
for (const Move &move : movelist)
{
do_move(board, move);
int tmp = -alphabeta(board, size, !whiteTurn, depth - 1, -beta, -alpha);
undo_move(board, move);
if (tmp > alpha)
{
if (depth == 5)
bestMove = move;
alpha = tmp;
}
}
return alpha;
}
//main game loop
void game(char **board, int size, bool &whiteTurn)
{
newGame(board, size);
printBoard(board, size);
system("PAUSE");
int a = -std::numeric_limits<int>::max();
int b = std::numeric_limits<int>::max();
do
{
alphabeta(board, size, whiteTurn, 5, a, b);
do_move(board, bestMove);
whiteTurn = !whiteTurn;
system("cls");
printBoard(board, size);
system("PAUSE");
} while (!findMoves(board, size, whiteTurn).empty());
}
int main()
{
int n = 8;
bool whTurn = true;
char **board=initBoard(n);
game(board, n, whTurn);
return 0;
}
答案 0 :(得分:0)
文献中通常描述的alpha-beta临界值的方式是 不必要地令人费解。您不需要两个限制,也不是一个好主意 从同一个玩家的角度评估移动。这是更多 清晰的说明:
for all moves: evaluate the move give it a temporary score from the point of view of the player at move for all counter moves: evaluate the move give is a temporary score from the point of view of the player at move for all counter-counter moves: evaluate the move give it a score from the point of view of the player at move undo the counter-counter move update best counter-counter move if better if the counter-counter move is so good, that current counter move cant be best, (other is better) then break subtract best counter-counter-move score from temporary counter score undo the counter move update best counter move if better if the counter move is so good, that current move cant be best, (other is better) then break subtract best counter-move score from temporary score undo the move update best move if better
逻辑是这样的:
假设您已经评估了一些动作,到目前为止最好的动作值得3
当前移动的临时得分是5。
您当前评估的反向移动价值4。
这意味着当前的举动最多值1。(5-4)
由于当前举动无法达到最佳状态,因此无需找到更好的反击举动