我正在尝试创建吃豆人游戏。我想训练吃豆人。我尝试使用Q学习。
我的代码:
class Agent(object):
def __init__(self):
self.q = collections.defaultdict(float)
self.epsilon = 0.4 # Exploration rate
self.gamma = 0.99 # Discount factor
def choose(self, s, actions):
p = random.uniform(0, 1)
if p < self.epsilon:
return random.choice(actions)
else:
return self.policy(s, actions)
def policy(self, s, actions):
max_value = max([self.Q(s, a) for a in actions])
max_actions = [a for a in actions if self.Q(s,a) == max_value]
return random.choice(max_actions)
def maxQvalue(self, s, actions):
return max([self.Q(s, a) for a in actions])
def Q(self, s, a):
return self.q[s, a]
def update(self, s, a, newS, r, actions):
self.q[s, a] = r + self.gamma * self.maxQvalue(newS, actions)
def main():
environment = Environment(20,10)
agent = Agent()
environment.initialize()
environment.display()
while not environment.terminal():
s = environment.state()
actions = environment.actions()
a = agent.choose(s, actions)
environment.update(a)
sp = environment.state()
r = environment.reward()
actions = environment.actions()
agent.update(s, a, sp, r, actions)
environment.display()
环境类别:
class Environment(object):
def actions(self):
#return possible action
def terminal(self):
# returns true if hits ghost or false
def reward(self):
# -200 if hits ghost
# -100 if hits wall
# 10 if hits food
# else 0
def update(self, action):
# update ball with action
# ghost moves in random action
def state(self):
# return ball position
如果所有食物都吃完了或者被鬼砸了,游戏就结束了。在上述情况下,我只有一个幽灵。我的猜测是,游戏只有在吃完所有食物后才能完成。但是我的代码没有做到这一点。吃完所有食物前,球击中了幽灵。我不知道我做错了什么。
我还有另一个问题,如果我想训练幽灵怎么办?接住球。如果球是由用户控制的。