Tic-Tac-Toe - alpha beta树搜索的迭代实现

时间:2015-10-06 15:00:35

标签: alpha-beta-pruning

尝试破译主要变体(PV)时遇到问题。

"主要变体是从根节点到叶节点的路径,其中每个节点具有相同的值。此叶节点的值确定根的minimax值,称为主叶。"

PV(move,eval)下面的游戏演示显示了这一行:

4,0    7,0    6,0    5,0    2,1

这是一个有效的PV,因为并非所有eval节点具有相同的值?人工智能永远不会失败,但由于令人目不暇接的PV似乎是虚假的,它会在AI逻辑上投下阴影。 :(希望,它只是一个PV虫!

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           |   |                 3 | 4 | 5
        ---|---|---             ---|---|---
           |   |                 6 | 7 | 8

Your move: 8

Thinking Cycles....: 2784300
Boards Generated...: 3956
Principal Variation: 4,0  7,0  6,0  5,0  2,1
Alpha-Beta Cutoffs.: 931
Computer Evaluation: 0
Computer Move......: 4

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           | X |                 3 | 4 | 5
        ---|---|---             ---|---|---
           |   | O               6 | 7 | 8

Your move: 7

Thinking Cycles....: 410484
Boards Generated...: 575
Principal Variation: 6,0  5,0  2,1
Alpha-Beta Cutoffs.: 63
Computer Evaluation: 0
Computer Move......: 6

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           | X |                 3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 2

Thinking Cycles....: 42808
Boards Generated...: 45
Principal Variation: 5,0  3,0  1,0  0,0
Alpha-Beta Cutoffs.: 1
Computer Evaluation: 0
Computer Move......: 5

           |   | O               0 | 1 | 2
        ---|---|---             ---|---|---
           | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 3

Thinking Cycles....: 6892
Boards Generated...: 4
Principal Variation: 0,0  1,0
Alpha-Beta Cutoffs.: 0
Computer Evaluation: 0
Computer Move......: 0

         X |   | O               0 | 1 | 2
        ---|---|---             ---|---|---
         O | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 1

         X | O | O               0 | 1 | 2
        ---|---|---             ---|---|---
         O | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

A draw! (*_*)

如果有人在我的代码中看到错误,请告诉我。感谢。

// Tic-Tac-Toe - Iterative implementation of alpha beta tree search.
// Built with Microsoft Visual Studio Professional 2013.

#include "stdafx.h"
#include <windows.h>
#include <intrin.h>
#include <stdint.h>


#define INFINITY 9999
#define NO_MOVE 9
#define NO_EVAL 2
#define X 1
#define O -1
#define Empty 0


struct values
{
    int nodeMove;
    int nodeEval;
    int alpha;
    int beta;
    int player;
    int board[9];
};


struct line
{
    int nodeMove;
    int nodeEval;
};

struct values moves[9];


int bestMove, bestEval;
int nodesCreated;
int abCutoffs;
int pvDepth, pvBestDepth;


// The principal variation pv[9] is a path from the root to a leaf node, in which every node
// has the same value. This leaf node, whose value determines the minimax value of the root,
// is called the principal leaf.

struct line pv[9] = {
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }
};


struct line bestPV[9] = {
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }
};


int board_eval(int *b)
{
    // Rows.
    if (b[0] && b[0] == b[1] && b[1] == b[2]) return b[0];
    if (b[3] && b[3] == b[4] && b[4] == b[5]) return b[3];
    if (b[6] && b[6] == b[7] && b[7] == b[8]) return b[6];

    // Cols.
    if (b[0] && b[0] == b[3] && b[3] == b[6]) return b[0];
    if (b[1] && b[1] == b[4] && b[4] == b[7]) return b[1];
    if (b[2] && b[2] == b[5] && b[5] == b[8]) return b[2];

    // Center is empty.
    if (!b[4]) return 0;

    // Diags.
    if (b[0] == b[4] && b[4] == b[8]) return b[0];
    if (b[2] == b[4] && b[4] == b[6]) return b[2];

    return 0;
}


void displayboard(int depth)
{
    const char *t = "O X";

    printf("\n\t %c | %c | %c\t\t 0 | 1 | 2\n", t[moves[depth].board[0] + 1], t[moves[depth].board[1] + 1], t[moves[depth].board[2] + 1]);
    printf("\t---|---|---\t\t---|---|---\n");
    printf("\t %c | %c | %c\t\t 3 | 4 | 5\n", t[moves[depth].board[3] + 1], t[moves[depth].board[4] + 1], t[moves[depth].board[5] + 1]);
    printf("\t---|---|---\t\t---|---|---\n");
    printf("\t %c | %c | %c\t\t 6 | 7 | 8\n\n", t[moves[depth].board[6] + 1], t[moves[depth].board[7] + 1], t[moves[depth].board[8] + 1]);
}


int find_move(int *board_arr, int nodeMove)
{
    int i;

    // Speedup loop using nodeMove instead of 0.
    for (i = nodeMove; i < 9; i++) {
        if (board_arr[i] == Empty)
            return i;
    }

    return NO_MOVE;
}


int move_up_tree(int depth)
{
    depth--;

    if (depth == 0 && (moves[depth + 1].nodeEval > moves[depth].nodeEval))
    {
        bestMove = moves[depth].nodeMove;
        bestEval = moves[depth + 1].nodeEval;
        pvBestDepth = pvDepth;

        pv[depth] = { bestMove, bestEval };

        for (int i = 0; i < pvDepth; ++i)
        {
            bestPV[i].nodeMove = pv[i].nodeMove;
            bestPV[i].nodeEval = pv[i].nodeEval;
            pv[i] = { NO_MOVE, NO_EVAL };
        }
    }

    if (moves[depth].player == X)
    {
        moves[depth].nodeEval = max(moves[depth].nodeEval, moves[depth + 1].nodeEval);
        moves[depth].alpha = max(moves[depth].alpha, moves[depth].nodeEval);
    }
    else
    {
        moves[depth].nodeEval = min(moves[depth].nodeEval, moves[depth + 1].nodeEval);
        moves[depth].beta = min(moves[depth].beta, moves[depth].nodeEval);
    }

    pv[depth] = { moves[depth].nodeMove, moves[depth].nodeEval };

    moves[depth].nodeMove++;
    moves[depth].nodeMove = find_move(moves[depth].board, moves[depth].nodeMove);

    return depth;
}


int move_down_tree(int depth)
{
    int eval;

    depth++;

    moves[depth] = moves[depth - 1];

    nodesCreated++;

    if (moves[depth].player == X)
    {
        moves[depth].board[moves[depth].nodeMove] = X;
        moves[depth].player = O;
        moves[depth].nodeEval = INFINITY;
    }
    else
    {
        moves[depth].board[moves[depth].nodeMove] = O;
        moves[depth].player = X;
        moves[depth].nodeEval = -INFINITY;
    }

    eval = board_eval(moves[depth].board);

    //  Leaf node.
    if (eval || find_move(moves[depth].board, 0) == NO_MOVE)
    {
        moves[depth].nodeEval = eval;
        moves[depth].nodeMove = NO_MOVE;
        pvDepth = depth;
    }
    else
    {
        moves[depth].nodeMove = find_move(moves[depth].board, 0);
    }

    return depth;
}


void computer_move()
{
    int depth = 0;
    uint64_t c1, c2;

    nodesCreated = 0;
    abCutoffs = 0;
    bestMove = NO_MOVE;
    bestEval = -INFINITY;

    moves[0].nodeMove = find_move(moves[0].board, 0);
    moves[0].nodeEval = -INFINITY;
    moves[0].alpha = -INFINITY;
    moves[0].beta = INFINITY;
    moves[0].player = X;

    if (moves[0].nodeMove != NO_MOVE)
    {
        c1 = __rdtsc();

        while (TRUE)
        {
            if (moves[depth].nodeMove == NO_MOVE)
            {
                if (depth == 0) break;

                depth = move_up_tree(depth);
            }
            else if (moves[depth].alpha >= moves[depth].beta)
            {
                abCutoffs++;
                moves[depth].nodeMove = NO_MOVE;
            }
            else
            {
                depth = move_down_tree(depth);
            }
        }

        c2 = __rdtsc();

        moves[0].board[bestMove] = X;

        printf("\n");
        printf("Thinking Cycles....: %d\n", c2 - c1);
        printf("Boards Generated...: %d\n", nodesCreated);
        printf("Principal Variation: ");

        for (int i = 0; i < pvBestDepth; ++i) printf("%d,%d  ", bestPV[i].nodeMove,bestPV[i].nodeEval);

        printf("\n");
        printf("Alpha-Beta Cutoffs.: %d\n", abCutoffs);
        printf("Computer Evaluation: %d\n", bestEval);
        printf("Computer Move......: %d\n", bestMove);
    }
}


void init_board()
{
    moves[0].board[0] = Empty;
    moves[0].board[1] = Empty;
    moves[0].board[2] = Empty;
    moves[0].board[3] = Empty;
    moves[0].board[4] = Empty;
    moves[0].board[5] = Empty;
    moves[0].board[6] = Empty;
    moves[0].board[7] = Empty;
    moves[0].board[8] = Empty;
}


void human_move()
{
    int move;
    char *p, s[100];

    printf("Your move: ");
    while (fgets(s, sizeof(s), stdin)) {
        move = strtol(s, &p, 10);
        if (p == s || *p != '\n') {
            printf("Your move: ");
        }
        else break;
    }

    moves[0].board[move] = O;
}


int main(int argc, char **argv)
{
    init_board();
    displayboard(0);

    while (1)
    {
        human_move();
        computer_move();
        displayboard(0);

        if (board_eval(moves[0].board))
        {
            printf("Computer Wins! (-_-)\n");
            init_board();
            displayboard(0);
        }
        else if (find_move(moves[0].board, 0) == NO_MOVE)
        {
            printf("A draw! (*_*)\n");
            init_board();
            displayboard(0);
        }
    }

    return 0;
}

0 个答案:

没有答案