Question

我试图将Q-learning实施到我写过的简单游戏中。游戏的基础是玩家必须跳过＆＃34;跳跃＆＃34;避免迎面而来的盒子。

我设计了两个动作的系统; jump和do_nothing，状态是距离下一个区块的距离（划分和覆盖以确保没有大量状态）。

我的问题似乎是我的算法实现并未考虑＆＃34;未来的奖励＆＃34;，因此它最终会在错误的时间跳跃。

这是我对Q学习算法的实现;

JumpGameAIClass.prototype.getQ = function getQ(state) {
    if (!this.Q.hasOwnProperty(state)) {
        this.Q[state] = {};

        for (var actionIndex = 0; actionIndex < this.actions.length; actionIndex++) {
            var action = this.actions[actionIndex];

            this.Q[state][action] = 0;
        }
    }

    return this.Q[state];
};

JumpGameAIClass.prototype.getBlockDistance = function getBlockDistance() {
    var closest = -1;

    for (var blockIndex = 0; blockIndex < this.blocks.length; blockIndex++) {
        var block = this.blocks[blockIndex];

        var distance = block.x - this.playerX;

        if (distance >= 0 && (closest === -1 || distance < closest)) {
            closest = distance;
        }
    }

    return Math.max(0, Math.floor(closest * this.resolution));
};

JumpGameAIClass.prototype.getActionWithHighestQ = function getActionWithHighestQ(distance) {
    var jumpReward = this.getQ(distance)[this.actions[0]];
    var doNothingReward = this.getQ(distance)[this.actions[1]];

    if (jumpReward > doNothingReward) {
        return this.actions[0];
    } else if (doNothingReward > jumpReward) {
        return this.actions[1];
    } else {
        if (!this.canJump()) {
            return this.actions[1];
        }

        return this.actions[Math.floor(Math.random() * this.actions.length)];
    }
};

JumpGameAIClass.prototype.getActionEpsilonGreedy = function getActionEpsilonGreedy() {
    // We can't jump while in mid-air
    if (!this.canJump()) {
        return this.actions[1];
    }

    if (Math.random() < this.epsilon) {
        return this.actions[Math.floor(Math.random() * this.actions.length)];
    } else {
        return this.getActionWithHighestQ(this.getBlockDistance());
    }
};

JumpGameAIClass.prototype.think = function think() {
    var reward = this.liveReward;

    if (this.score !== this.lastScore) {
        this.lastScore = this.score;
        reward = this.scoreReward;
    } else if (!this.playerAlive) {
        reward = this.deathReward;
    }

    this.drawDistance();

    var distance = this.getBlockDistance(),
        maxQ = this.getQ(distance)[this.getActionWithHighestQ(distance)],
        previousQ = this.getQ(this.lastDistance)[this.lastAction];

    this.getQ(this.lastDistance)[this.lastAction] = previousQ + this.alpha * (reward + (this.gamma * maxQ) - previousQ);

    this.lastAction = this.getActionEpsilonGreedy();
    this.lastDistance = distance;

    switch (this.lastAction) {
        case this.actions[0]:
            this.jump();
            break;
    }
};

以下是它使用的一些属性：

epsilon: 0.05,
alpha: 1,
gamma: 1,
resolution: 0.1,
actions: [ 'jump', 'do_nothing' ],
Q: {},
liveReward: 0,
scoreReward: 100,
deathReward: -1000,
lastAction: 'do_nothing',
lastDistance: 0,
lastScore: 0

我不得不使用lastAction / lastDistance来计算Q，因为我无法使用当前数据（将会对之前在帧中执行的操作起作用）。

在所有渲染和游戏内容完成后，每帧都会调用think方法（物理，控制，死亡等）。

＆＃13;

var JumpGameAIClass = function JumpGame(canvas) {
    Game.JumpGame.call(this, canvas);

    Object.defineProperties(this, {
        epsilon: {
            value: 0.05
        },

        alpha: {
            value: 1
        },

        gamma: {
            value: 1
        },

        resolution: {
            value: 0.1
        },

        actions: {
            value: [ 'jump', 'do_nothing' ]
        },

        Q: {
            value: { },
            writable: true
        },

        liveReward: {
            value: 0
        },

        scoreReward: {
            value: 100
        },

        deathReward: {
            value: -1000
        },

        lastAction: {
            value: 'do_nothing',
            writable: true
        },

        lastDistance: {
            value: 0,
            writable: true
        },

        lastScore: {
            value: 0,
            writable: true
        }
    });
};

JumpGameAIClass.prototype = Object.create(Game.JumpGame.prototype);

JumpGameAIClass.prototype.getQ = function getQ(state) {
    if (!this.Q.hasOwnProperty(state)) {
        this.Q[state] = {};

        for (var actionIndex = 0; actionIndex < this.actions.length; actionIndex++) {
            var action = this.actions[actionIndex];

            this.Q[state][action] = 0;
        }
    }

    return this.Q[state];
};

JumpGameAIClass.prototype.getBlockDistance = function getBlockDistance() {
    var closest = -1;

    for (var blockIndex = 0; blockIndex < this.blocks.length; blockIndex++) {
        var block = this.blocks[blockIndex];

        var distance = block.x - this.playerX;

        if (distance >= 0 && (closest === -1 || distance < closest)) {
            closest = distance;
        }
    }

    return Math.max(0, Math.floor(closest * this.resolution));
};

JumpGameAIClass.prototype.getActionWithHighestQ = function getActionWithHighestQ(distance) {
    var jumpReward = this.getQ(distance)[this.actions[0]];
    var doNothingReward = this.getQ(distance)[this.actions[1]];

    if (jumpReward > doNothingReward) {
        return this.actions[0];
    } else if (doNothingReward > jumpReward) {
        return this.actions[1];
    } else {
        if (!this.canJump()) {
            return this.actions[1];
        }

        return this.actions[Math.floor(Math.random() * this.actions.length)];
    }
};

JumpGameAIClass.prototype.getActionEpsilonGreedy = function getActionEpsilonGreedy() {
    if (!this.canJump()) {
        return this.actions[1];
    }

    if (Math.random() < this.epsilon) {
        return this.actions[Math.floor(Math.random() * this.actions.length)];
    } else {
        return this.getActionWithHighestQ(this.getBlockDistance());
    }
};

JumpGameAIClass.prototype.onDeath = function onDeath() {
    this.restart();
};

JumpGameAIClass.prototype.think = function think() {
    var reward = this.liveReward;

    if (this.score !== this.lastScore) {
        this.lastScore = this.score;
        reward = this.scoreReward;
    } else if (!this.playerAlive) {
        reward = this.deathReward;
    }

    this.drawDistance();

    var distance = this.getBlockDistance(),
        maxQ = this.getQ(distance)[this.getActionWithHighestQ(distance)],
        previousQ = this.getQ(this.lastDistance)[this.lastAction];

    this.getQ(this.lastDistance)[this.lastAction] = previousQ + this.alpha * (reward + (this.gamma * maxQ) - previousQ);

    this.lastAction = this.getActionEpsilonGreedy();
    this.lastDistance = distance;

    switch (this.lastAction) {
        case this.actions[0]:
            this.jump();
            break;
    }
};

JumpGameAIClass.prototype.drawDistance = function drawDistance() {
    this.context.save();

    this.context.textAlign = 'center';
    this.context.textBaseline = 'bottom';

    this.context.fillText('Distance: ' + this.getBlockDistance(), this.canvasWidth / 2, this.canvasHeight / 4);

    this.context.textBaseline = 'top';

    this.context.fillText('Last Distance: ' + this.lastDistance, this.canvasWidth / 2, this.canvasHeight / 4);

    this.context.restore();
};

JumpGameAIClass.prototype.onFrame = function onFrame() {
    Game.JumpGame.prototype.onFrame.apply(this, arguments);

    this.think();
}

Game.JumpGameAI = JumpGameAIClass;

＆＃13;

body {
    background-color: #EEEEEE;
    text-align: center;
}

canvas#game {
    background-color: #FFFFFF;
    border: 1px solid #DDDDDD;
}

＆＃13;

<!DOCTYPE HTML>
<html lang="en">
<head>
    <title>jump</title>
</head>
<body>
    <canvas id="game" width="512" height="512">
        <h1>Your browser doesn't support canvas!</h1>
    </canvas>
  
    <script src="https://raw.githubusercontent.com/cagosta/requestAnimationFrame/master/app/requestAnimationFrame.js"></script>
  
    <!-- https://gist.github.com/jackwilsdon/d06bffa6b32c53321478 -->
  
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/4e467f82590e76543bf55ff788504e26afc3d694/game.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/2b7ce2c3dd268c4aef9ad27316edb0b235ad0d06/canvasgame.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/2696c72e001e48359a6ce880f1c475613fe359f5/jump.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/249c92f3385757b6edf2ceb49e26f14b89ffdcfe/bootstrap.js"></script>
</body>

＆＃13;

Answer 1

你基本上有简化版：

enter image description here

来源：Flappy Bird RL

我使用了值：

    epsilon: {
        value: 0.01
    },
    alpha: {
        value: 0.7
    },
    gamma: {
        value: 0.9
    },
    resolution: {
        value: 0.1
    },  
    liveReward: {
        value: 10
    },
    scoreReward: {
        value: -100
    },
    deathReward: {
        value: 1000
    },

在前20次尝试中没有超过100的麻烦。

Q学习可以用时态逻辑来描述

Q(s, a)=r(s,a)+gamma*max_a'(Q(s', a'))

哪里

r(s,a) = r =立即奖励
gamma =延迟与即时奖励的相对值（0到1）
s' =行动后的新状态a
a =状态s
a' =状态s'

您应该将其作为

执行

选择一个动作并执行它

对于每个状态 - 动作对（s，a），将表条目Q（s，a）初始化为零
观察当前状态s
永远做：
- 选择操作 a 并执行
- 立即获得奖励 r 又称Q（s，a）
- 观察新状态 s'
- 更新表格条目 Q（s，a）= r（s，a）+ gamma * max_a'（Q（s'，a'））
- 的 S = S'

Answer 2

你的算法实现很好，只需要调整一些参数。

如果您为生活分配一些奖励，在我的示例中为10，并将epsilon设置为0，您将获得一个获胜的AI。

示例：

var JumpGameAIClass = function JumpGame(canvas) {
    Game.JumpGame.call(this, canvas);

    Object.defineProperties(this, {
        epsilon: {
            value: 0
        },

        alpha: {
            value: 1
        },

        gamma: {
            value: 1
        },

        resolution: {
            value: 0.1
        },

        actions: {
            value: [ 'jump', 'do_nothing' ]
        },

        Q: {
            value: { },
            writable: true
        },

        liveReward: {
            value: 0
        },

        scoreReward: {
            value: 100
        },

        deathReward: {
            value: -1000
        },

        lastAction: {
            value: 'do_nothing',
            writable: true
        },

        lastDistance: {
            value: 0,
            writable: true
        },

        lastScore: {
            value: 0,
            writable: true
        }
    });
};

JumpGameAIClass.prototype = Object.create(Game.JumpGame.prototype);

JumpGameAIClass.prototype.getQ = function getQ(state) {
    if (!this.Q.hasOwnProperty(state)) {
        this.Q[state] = {};

        for (var actionIndex = 0; actionIndex < this.actions.length; actionIndex++) {
            var action = this.actions[actionIndex];

            this.Q[state][action] = 0;
        }
    }

    return this.Q[state];
};

JumpGameAIClass.prototype.getBlockDistance = function getBlockDistance() {
    var closest = -1;

    for (var blockIndex = 0; blockIndex < this.blocks.length; blockIndex++) {
        var block = this.blocks[blockIndex];

        var distance = block.x - this.playerX;

        if (distance >= 0 && (closest === -1 || distance < closest)) {
            closest = distance;
        }
    }

    return Math.max(0, Math.floor(closest * this.resolution));
};

JumpGameAIClass.prototype.getActionWithHighestQ = function getActionWithHighestQ(distance) {
    var jumpReward = this.getQ(distance)[this.actions[0]];
    var doNothingReward = this.getQ(distance)[this.actions[1]];
    
    if (!this.canJump()) {
        return this.actions[1];
    } else if (jumpReward > doNothingReward) {
        return this.actions[0];
    } else if (doNothingReward > jumpReward) {
        return this.actions[1];
    } else {   
        return this.actions[Math.floor(Math.random() * this.actions.length)];
    }
};

JumpGameAIClass.prototype.getActionEpsilonGreedy = function getActionEpsilonGreedy() {
    if (!this.canJump()) {
        return this.actions[1];
    }

    if (Math.random() < this.epsilon) {
        return this.actions[Math.floor(Math.random() * this.actions.length)];
    } else {
        return this.getActionWithHighestQ(this.getBlockDistance());
    }
};

JumpGameAIClass.prototype.onDeath = function onDeath() {
    this.restart();
};

JumpGameAIClass.prototype.think = function think() {
    var reward = this.liveReward;

    if (this.score !== this.lastScore) {
        this.lastScore = this.score;
        reward = this.scoreReward;
    } else if (!this.playerAlive) {
        reward = this.deathReward;
    }

    this.drawDistance();

    var distance = this.getBlockDistance(),
        maxQ = this.playerAlive ? this.getQ(distance)[this.getActionWithHighestQ(distance)] : 0,
        previousQ = this.getQ(this.lastDistance)[this.lastAction];

    this.getQ(this.lastDistance)[this.lastAction] = previousQ + this.alpha * (reward + (this.gamma * maxQ) - previousQ);

    this.lastAction = this.getActionEpsilonGreedy();
    this.lastDistance = distance;

    switch (this.lastAction) {
        case this.actions[0]:
            this.jump();
            break;
    }
};

JumpGameAIClass.prototype.drawDistance = function drawDistance() {
    this.context.save();

    this.context.textAlign = 'center';
    this.context.textBaseline = 'bottom';

    this.context.fillText('Distance: ' + this.getBlockDistance(), this.canvasWidth / 2, this.canvasHeight / 4);

    this.context.textBaseline = 'top';

    this.context.fillText('Last Distance: ' + this.lastDistance, this.canvasWidth / 2, this.canvasHeight / 4);

    this.context.restore();
};

JumpGameAIClass.prototype.onFrame = function onFrame() {
    Game.JumpGame.prototype.onFrame.apply(this, arguments);

    this.think();
}

Game.JumpGameAI = JumpGameAIClass;

body {
    background-color: #EEEEEE;
    text-align: center;
}

canvas#game {
    background-color: #FFFFFF;
    border: 1px solid #DDDDDD;
}

<!DOCTYPE HTML>
<html lang="en">
<head>
    <title>jump</title>
</head>
<body>
    <canvas id="game" width="512" height="512">
        <h1>Your browser doesn't support canvas!</h1>
    </canvas>
  
    <script src="https://raw.githubusercontent.com/cagosta/requestAnimationFrame/master/app/requestAnimationFrame.js"></script>
  
    <!-- https://gist.github.com/jackwilsdon/d06bffa6b32c53321478 -->
  
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/4e467f82590e76543bf55ff788504e26afc3d694/game.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/2b7ce2c3dd268c4aef9ad27316edb0b235ad0d06/canvasgame.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/2696c72e001e48359a6ce880f1c475613fe359f5/jump.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/249c92f3385757b6edf2ceb49e26f14b89ffdcfe/bootstrap.js"></script>
</body>

更新

对此有一点想法，虽然我的例子看起来有效，但这是不正确的。

讨厌的是，因为跳跃的结果直到未来的多次迭代才知道，为生活分配直接奖励将导致在每个州首先做出的任何随机决定重复，直到最终结果该决定通过各州传播回来。

根据游戏的物理特性，玩家的跳跃距离小于块间距，这意味着清除一个块的跳跃将比下一个块的起点更远离下一个块，因此同样的跳跃可以再做一次。因此，如果在第一个块之前进行“良好”跳转，系统将立即收敛到成功的模式。如果游戏的物理特性不同或者发生“坏”跳跃，那么这个AI可能无法自我纠正。

问题是系统实际上有两个部分，即blockDistance和playerY。如果不在决策中包含playerY状态，则跳转的结果无法正确传播回原点。

你可以在这个简单的游戏中通过偏见决策来解决这个问题，不采取任何行动。由于基于距离的决策状态是完整的，只要你不跳跃，不跳跃的结果，即死亡，将正确地传播回决定，不会在每个距离跳跃。它仍然有点时髦，因为一旦你跳过奖励的传播将是不正确的，但你现在可以看到它学习所有相同。

示例：

var JumpGameAIClass = function JumpGame(canvas) {
    Game.JumpGame.call(this, canvas);

    Object.defineProperties(this, {
        epsilon: {
            value: 0
        },

        alpha: {
            value: 1
        },

        gamma: {
            value: 1
        },

        resolution: {
            value: 0.1
        },

        actions: {
            value: [ 'jump', 'do_nothing' ]
        },

        Q: {
            value: { },
            writable: true
        },

        liveReward: {
            value: 10
        },

        scoreReward: {
            value: 100
        },

        deathReward: {
            value: -1000
        },

        lastAction: {
            value: 'do_nothing',
            writable: true
        },

        lastDistance: {
            value: 0,
            writable: true
        },

        lastScore: {
            value: 0,
            writable: true
        }
    });
};

JumpGameAIClass.prototype = Object.create(Game.JumpGame.prototype);

JumpGameAIClass.prototype.getQ = function getQ(state) {
    if (!this.Q.hasOwnProperty(state)) {
        this.Q[state] = {};

        for (var actionIndex = 0; actionIndex < this.actions.length; actionIndex++) {
            var action = this.actions[actionIndex];

            this.Q[state][action] = 0;
        }
    }

    return this.Q[state];
};

JumpGameAIClass.prototype.getBlockDistance = function getBlockDistance() {
    var closest = -1;

    for (var blockIndex = 0; blockIndex < this.blocks.length; blockIndex++) {
        var block = this.blocks[blockIndex];

        var distance = block.x - this.playerX;

        if (distance >= 0 && (closest === -1 || distance < closest)) {
            closest = distance;
        }
    }

    return Math.max(0, Math.floor(closest * this.resolution));
};

JumpGameAIClass.prototype.getActionWithHighestQ = function getActionWithHighestQ(distance) {
    var jumpReward = this.getQ(distance)[this.actions[0]];
    var doNothingReward = this.getQ(distance)[this.actions[1]];

if (!this.canJump() || doNothingReward >= jumpReward) {
	return this.actions[1];
} else {
	return this.actions[0];
}    
};

JumpGameAIClass.prototype.getActionEpsilonGreedy = function getActionEpsilonGreedy() {
    if (!this.canJump()) {
        return this.actions[1];
    }

    if (Math.random() < this.epsilon) {
        return this.actions[Math.floor(Math.random() * this.actions.length)];
    } else {
        return this.getActionWithHighestQ(this.getBlockDistance());
    }
};

JumpGameAIClass.prototype.onDeath = function onDeath() {
    this.restart();
};

JumpGameAIClass.prototype.think = function think() {
    var reward = this.liveReward;

    if (this.score !== this.lastScore) {
        this.lastScore = this.score;
        reward = this.scoreReward;
    } else if (!this.playerAlive) {
        reward = this.deathReward;
    }

    this.drawDistance();

    var distance = this.getBlockDistance(),
        maxQ = this.playerAlive ? this.getQ(distance)[this.getActionWithHighestQ(distance)] : 0,
        previousQ = this.getQ(this.lastDistance)[this.lastAction];

    this.getQ(this.lastDistance)[this.lastAction] = previousQ + this.alpha * (reward + (this.gamma * maxQ) - previousQ);

    this.lastAction = this.getActionEpsilonGreedy();
    this.lastDistance = distance;

    switch (this.lastAction) {
        case this.actions[0]:
            this.jump();
            break;
    }
};

JumpGameAIClass.prototype.drawDistance = function drawDistance() {
    this.context.save();

    this.context.textAlign = 'center';
    this.context.textBaseline = 'bottom';

    this.context.fillText('Distance: ' + this.getBlockDistance(), this.canvasWidth / 2, this.canvasHeight / 4);

    this.context.textBaseline = 'top';

    this.context.fillText('Last Distance: ' + this.lastDistance, this.canvasWidth / 2, this.canvasHeight / 4);

    this.context.restore();
};

JumpGameAIClass.prototype.onFrame = function onFrame() {
    Game.JumpGame.prototype.onFrame.apply(this, arguments);

    this.think();
}

Game.JumpGameAI = JumpGameAIClass;

body {
    background-color: #EEEEEE;
    text-align: center;
}

canvas#game {
    background-color: #FFFFFF;
    border: 1px solid #DDDDDD;
}

<!DOCTYPE HTML>
<html lang="en">
<head>
    <title>jump</title>
</head>
<body>
    <canvas id="game" width="512" height="512">
        <h1>Your browser doesn't support canvas!</h1>
    </canvas>
  
    <script src="https://raw.githubusercontent.com/cagosta/requestAnimationFrame/master/app/requestAnimationFrame.js"></script>
  
    <!-- https://gist.github.com/jackwilsdon/d06bffa6b32c53321478 -->
  
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/4e467f82590e76543bf55ff788504e26afc3d694/game.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/2b7ce2c3dd268c4aef9ad27316edb0b235ad0d06/canvasgame.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/2696c72e001e48359a6ce880f1c475613fe359f5/jump.js"></script>
    <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/249c92f3385757b6edf2ceb49e26f14b89ffdcfe/bootstrap.js"></script>
</body>

游戏中的Q-learning无法按预期工作

2 个答案: