Question

我正在尝试创建吃豆人游戏。我想训练吃豆人。我尝试使用Q学习。

我的代码：

class Agent(object):
    def __init__(self):
        self.q = collections.defaultdict(float)
        self.epsilon = 0.4 # Exploration rate
        self.gamma = 0.99 # Discount factor

    def choose(self, s, actions):
        p = random.uniform(0, 1)
        if p < self.epsilon:
            return random.choice(actions)
        else:
            return self.policy(s, actions)

    def policy(self, s, actions):
        max_value = max([self.Q(s, a) for a in actions])
        max_actions = [a for a in actions if self.Q(s,a) == max_value]
        return random.choice(max_actions)

    def maxQvalue(self, s, actions):
        return max([self.Q(s, a) for a in actions])

    def Q(self, s, a):
        return self.q[s, a]

    def update(self, s, a, newS, r, actions):
        self.q[s, a] = r + self.gamma * self.maxQvalue(newS, actions)

def main():
    environment = Environment(20,10)
    agent = Agent()
    environment.initialize()
    environment.display()
    while not environment.terminal():
        s = environment.state()
        actions = environment.actions()
        a = agent.choose(s, actions)
        environment.update(a)     
        sp = environment.state()
        r = environment.reward()
        actions = environment.actions()
        agent.update(s, a, sp, r, actions)
        environment.display()

环境类别：

class Environment(object):

    def actions(self):
        #return possible action

    def terminal(self):
       # returns true if hits ghost or false

    def reward(self):
        # -200 if hits ghost
        # -100 if hits wall
        # 10 if hits food
        # else 0

    def update(self, action):        
        # update ball with action
        # ghost moves in random action

    def state(self):
        # return ball position

如果所有食物都吃完了或者被鬼砸了，游戏就结束了。在上述情况下，我只有一个幽灵。我的猜测是，游戏只有在吃完所有食物后才能完成。但是我的代码没有做到这一点。吃完所有食物前，球击中了幽灵。我不知道我做错了什么。

我还有另一个问题，如果我想训练幽灵怎么办？接住球。如果球是由用户控制的。

吃豆人用Q学习

0 个答案: