我目前正在为棋盘游戏编写AI Hex。我想使用蒙特卡罗树搜索这样做并且已经尝试实现它。但是,AI会做出令人难以置信的愚蠢(随机)动作,我无法弄清楚它为什么不起作用。
import java.util.ArrayList;
import java.util.Random;
/**
* Created by Robin on 18.03.2017.
*/
public class TreeNode {
private static final Random random = new Random();
private static final double epsion=10e-5;
protected double nvisits;
protected double totValue;
protected int move=-1;
private HexBoard board;
protected ArrayList<TreeNode>children ;
public TreeNode(HexBoard board){
this.board =board;
}
//Copy-Constructor
public TreeNode(TreeNode treeNode){
this.nvisits=treeNode.nvisits;
this.totValue=treeNode.totValue;
this.move=treeNode.move;
this.board = new HexBoard(treeNode.board);
}
public void update(double value){
totValue+=value*board.color;
nvisits++;
}
public void expand(){
assert(children==null);
children = new ArrayList<>(121-board.moveCount);
for(int i=0;i<121;i++){
if(board.board[i]!=HexBoard.EMPTY)
continue;
TreeNode newNode = new TreeNode(board);
newNode.move =i;
children.add(newNode);
}
}
public void calculateIteration(){
ArrayList<TreeNode>visited = new ArrayList<>();
TreeNode current =this;
visited.add(current);
while(!current.isLeafNode()){
current =current.select();
board.makeMove(current.move);
visited.add(current);
}
//Found a leaf node
double value;
if(current.board.getWinner()==0){
current.expand();
TreeNode newNode =current.select();
value =playOut(newNode.board);
}else{
value =current.board.getWinner();
}
//update all the nodes
for(int i=1;i<visited.size();i++){
visited.get(i).update(value);
board.undoMove(visited.get(i).move);
}
visited.get(0).update(value);
}
public static int playOut(HexBoard board){
int winner=0;
if(board.moveCount==121) {
winner=board.getWinner();
return winner;
}
//Checking-Movecount vs actual stones on the board
final double left =121-board.moveCount;
double probibility =1/left;
double summe =0;
double p =random.nextDouble();
int randomMove =0;
for(int i=0;i<121;i++){
if(board.board[i]!=HexBoard.EMPTY)
continue;
summe+=probibility;
if(p<=summe && probibility!=0) {
randomMove = i;
break;
}
}
board.makeMove(randomMove);
winner =playOut(board);
board.undoMove(randomMove);
return winner;
}
public TreeNode select(){
TreeNode bestNode=null;
double bestValue =-10000000;
for(TreeNode node : children){
double uctvalue =(node.nvisits==0)?100000:(node.totValue/(node.nvisits)+Math.sqrt((Math.log(this.nvisits))/(2*node.nvisits)));
uctvalue+=epsion*random.nextDouble();
if(uctvalue>bestValue){
bestValue=uctvalue;
bestNode =node;
}
}
return bestNode;
///
}
public boolean isLeafNode(){
return (children==null);
}
}
我在方法calcualteIteration()中的实现是否正确?
我知道这可能不是一个非常有吸引力的问题,但我会感激任何帮助
答案 0 :(得分:2)
OP在问题后的评论中添加了额外信息。这些额外信息的重要部分是实施了makeMove()
方法来检查接下来要播放哪个播放器(以确保对电路板的更新是正确的)。
鉴于该信息,OP中select()
的实施不正确,因为它没有考虑在计算UCT分数时要移动哪个玩家。 UCT评分包括&#34;剥削&#34;部分(第一部分,计算所有先前模拟的平均分数),以及&#34;探索&#34; part(平方根下的部分,对于已经相对于其父级很少访问的节点而增加)。当允许对手下一次移动时,应该否定该等式的利用部分。如果没有这样做,人工智能将基本上假设对手愿意积极帮助AI,而不是假设对手会试图为自己赢得胜利。