尝试破译主要变体(PV)时遇到问题。
"主要变体是从根节点到叶节点的路径,其中每个节点具有相同的值。此叶节点的值确定根的minimax值,称为主叶。"
PV(move,eval)下面的游戏演示显示了这一行:
4,0 7,0 6,0 5,0 2,1
这是一个有效的PV,因为并非所有eval节点具有相同的值?人工智能永远不会失败,但由于令人目不暇接的PV似乎是虚假的,它会在AI逻辑上投下阴影。 :(希望,它只是一个PV虫!
| | 0 | 1 | 2
---|---|--- ---|---|---
| | 3 | 4 | 5
---|---|--- ---|---|---
| | 6 | 7 | 8
Your move: 8
Thinking Cycles....: 2784300
Boards Generated...: 3956
Principal Variation: 4,0 7,0 6,0 5,0 2,1
Alpha-Beta Cutoffs.: 931
Computer Evaluation: 0
Computer Move......: 4
| | 0 | 1 | 2
---|---|--- ---|---|---
| X | 3 | 4 | 5
---|---|--- ---|---|---
| | O 6 | 7 | 8
Your move: 7
Thinking Cycles....: 410484
Boards Generated...: 575
Principal Variation: 6,0 5,0 2,1
Alpha-Beta Cutoffs.: 63
Computer Evaluation: 0
Computer Move......: 6
| | 0 | 1 | 2
---|---|--- ---|---|---
| X | 3 | 4 | 5
---|---|--- ---|---|---
X | O | O 6 | 7 | 8
Your move: 2
Thinking Cycles....: 42808
Boards Generated...: 45
Principal Variation: 5,0 3,0 1,0 0,0
Alpha-Beta Cutoffs.: 1
Computer Evaluation: 0
Computer Move......: 5
| | O 0 | 1 | 2
---|---|--- ---|---|---
| X | X 3 | 4 | 5
---|---|--- ---|---|---
X | O | O 6 | 7 | 8
Your move: 3
Thinking Cycles....: 6892
Boards Generated...: 4
Principal Variation: 0,0 1,0
Alpha-Beta Cutoffs.: 0
Computer Evaluation: 0
Computer Move......: 0
X | | O 0 | 1 | 2
---|---|--- ---|---|---
O | X | X 3 | 4 | 5
---|---|--- ---|---|---
X | O | O 6 | 7 | 8
Your move: 1
X | O | O 0 | 1 | 2
---|---|--- ---|---|---
O | X | X 3 | 4 | 5
---|---|--- ---|---|---
X | O | O 6 | 7 | 8
A draw! (*_*)
如果有人在我的代码中看到错误,请告诉我。感谢。
// Tic-Tac-Toe - Iterative implementation of alpha beta tree search.
// Built with Microsoft Visual Studio Professional 2013.
#include "stdafx.h"
#include <windows.h>
#include <intrin.h>
#include <stdint.h>
#define INFINITY 9999
#define NO_MOVE 9
#define NO_EVAL 2
#define X 1
#define O -1
#define Empty 0
struct values
{
int nodeMove;
int nodeEval;
int alpha;
int beta;
int player;
int board[9];
};
struct line
{
int nodeMove;
int nodeEval;
};
struct values moves[9];
int bestMove, bestEval;
int nodesCreated;
int abCutoffs;
int pvDepth, pvBestDepth;
// The principal variation pv[9] is a path from the root to a leaf node, in which every node
// has the same value. This leaf node, whose value determines the minimax value of the root,
// is called the principal leaf.
struct line pv[9] = {
{ NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
{ NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
{ NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }
};
struct line bestPV[9] = {
{ NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
{ NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
{ NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }
};
int board_eval(int *b)
{
// Rows.
if (b[0] && b[0] == b[1] && b[1] == b[2]) return b[0];
if (b[3] && b[3] == b[4] && b[4] == b[5]) return b[3];
if (b[6] && b[6] == b[7] && b[7] == b[8]) return b[6];
// Cols.
if (b[0] && b[0] == b[3] && b[3] == b[6]) return b[0];
if (b[1] && b[1] == b[4] && b[4] == b[7]) return b[1];
if (b[2] && b[2] == b[5] && b[5] == b[8]) return b[2];
// Center is empty.
if (!b[4]) return 0;
// Diags.
if (b[0] == b[4] && b[4] == b[8]) return b[0];
if (b[2] == b[4] && b[4] == b[6]) return b[2];
return 0;
}
void displayboard(int depth)
{
const char *t = "O X";
printf("\n\t %c | %c | %c\t\t 0 | 1 | 2\n", t[moves[depth].board[0] + 1], t[moves[depth].board[1] + 1], t[moves[depth].board[2] + 1]);
printf("\t---|---|---\t\t---|---|---\n");
printf("\t %c | %c | %c\t\t 3 | 4 | 5\n", t[moves[depth].board[3] + 1], t[moves[depth].board[4] + 1], t[moves[depth].board[5] + 1]);
printf("\t---|---|---\t\t---|---|---\n");
printf("\t %c | %c | %c\t\t 6 | 7 | 8\n\n", t[moves[depth].board[6] + 1], t[moves[depth].board[7] + 1], t[moves[depth].board[8] + 1]);
}
int find_move(int *board_arr, int nodeMove)
{
int i;
// Speedup loop using nodeMove instead of 0.
for (i = nodeMove; i < 9; i++) {
if (board_arr[i] == Empty)
return i;
}
return NO_MOVE;
}
int move_up_tree(int depth)
{
depth--;
if (depth == 0 && (moves[depth + 1].nodeEval > moves[depth].nodeEval))
{
bestMove = moves[depth].nodeMove;
bestEval = moves[depth + 1].nodeEval;
pvBestDepth = pvDepth;
pv[depth] = { bestMove, bestEval };
for (int i = 0; i < pvDepth; ++i)
{
bestPV[i].nodeMove = pv[i].nodeMove;
bestPV[i].nodeEval = pv[i].nodeEval;
pv[i] = { NO_MOVE, NO_EVAL };
}
}
if (moves[depth].player == X)
{
moves[depth].nodeEval = max(moves[depth].nodeEval, moves[depth + 1].nodeEval);
moves[depth].alpha = max(moves[depth].alpha, moves[depth].nodeEval);
}
else
{
moves[depth].nodeEval = min(moves[depth].nodeEval, moves[depth + 1].nodeEval);
moves[depth].beta = min(moves[depth].beta, moves[depth].nodeEval);
}
pv[depth] = { moves[depth].nodeMove, moves[depth].nodeEval };
moves[depth].nodeMove++;
moves[depth].nodeMove = find_move(moves[depth].board, moves[depth].nodeMove);
return depth;
}
int move_down_tree(int depth)
{
int eval;
depth++;
moves[depth] = moves[depth - 1];
nodesCreated++;
if (moves[depth].player == X)
{
moves[depth].board[moves[depth].nodeMove] = X;
moves[depth].player = O;
moves[depth].nodeEval = INFINITY;
}
else
{
moves[depth].board[moves[depth].nodeMove] = O;
moves[depth].player = X;
moves[depth].nodeEval = -INFINITY;
}
eval = board_eval(moves[depth].board);
// Leaf node.
if (eval || find_move(moves[depth].board, 0) == NO_MOVE)
{
moves[depth].nodeEval = eval;
moves[depth].nodeMove = NO_MOVE;
pvDepth = depth;
}
else
{
moves[depth].nodeMove = find_move(moves[depth].board, 0);
}
return depth;
}
void computer_move()
{
int depth = 0;
uint64_t c1, c2;
nodesCreated = 0;
abCutoffs = 0;
bestMove = NO_MOVE;
bestEval = -INFINITY;
moves[0].nodeMove = find_move(moves[0].board, 0);
moves[0].nodeEval = -INFINITY;
moves[0].alpha = -INFINITY;
moves[0].beta = INFINITY;
moves[0].player = X;
if (moves[0].nodeMove != NO_MOVE)
{
c1 = __rdtsc();
while (TRUE)
{
if (moves[depth].nodeMove == NO_MOVE)
{
if (depth == 0) break;
depth = move_up_tree(depth);
}
else if (moves[depth].alpha >= moves[depth].beta)
{
abCutoffs++;
moves[depth].nodeMove = NO_MOVE;
}
else
{
depth = move_down_tree(depth);
}
}
c2 = __rdtsc();
moves[0].board[bestMove] = X;
printf("\n");
printf("Thinking Cycles....: %d\n", c2 - c1);
printf("Boards Generated...: %d\n", nodesCreated);
printf("Principal Variation: ");
for (int i = 0; i < pvBestDepth; ++i) printf("%d,%d ", bestPV[i].nodeMove,bestPV[i].nodeEval);
printf("\n");
printf("Alpha-Beta Cutoffs.: %d\n", abCutoffs);
printf("Computer Evaluation: %d\n", bestEval);
printf("Computer Move......: %d\n", bestMove);
}
}
void init_board()
{
moves[0].board[0] = Empty;
moves[0].board[1] = Empty;
moves[0].board[2] = Empty;
moves[0].board[3] = Empty;
moves[0].board[4] = Empty;
moves[0].board[5] = Empty;
moves[0].board[6] = Empty;
moves[0].board[7] = Empty;
moves[0].board[8] = Empty;
}
void human_move()
{
int move;
char *p, s[100];
printf("Your move: ");
while (fgets(s, sizeof(s), stdin)) {
move = strtol(s, &p, 10);
if (p == s || *p != '\n') {
printf("Your move: ");
}
else break;
}
moves[0].board[move] = O;
}
int main(int argc, char **argv)
{
init_board();
displayboard(0);
while (1)
{
human_move();
computer_move();
displayboard(0);
if (board_eval(moves[0].board))
{
printf("Computer Wins! (-_-)\n");
init_board();
displayboard(0);
}
else if (find_move(moves[0].board, 0) == NO_MOVE)
{
printf("A draw! (*_*)\n");
init_board();
displayboard(0);
}
}
return 0;
}