我将我的代码Q表制作为书名“学习通过Python和Keras进行学习的强化学习”,我想用Simpy查看结果。因此,我修复了它,但是它不起作用。
我在代码中找不到错误,因此我尝试更改变量。我的代码:
import numpy as np
import random
width = 4
height = 4
class Env:
def __init__(self):
self.CanDoAction = [0,1,2,3] # up down left right
self.field = np.arange(16).reshape(width,height) #Environment
def reset(self):
self.state=self.field[0][0] # agent start position
self.done = False
def move(self, action): # move and boundry Condition
if self.CanDoAction[action] == 0:
if self.state is not self.field[0][0] or self.field[0][1] or self.field[0][2] or self.field[0][3]:
k = -self.field[1][0]
else:
k = self.field[0][0]
elif self.CanDoAction[action] == 1:
if self.state is not self.field[3][0] or self.field[3][1] or self.field[3][2] or self.field[3][3]:
k = self.field[1][0]
else:
k = self.field[0][0]
elif self.CanDoAction[action] == 2:
if self.state is not self.field[0][0] or self.field[1][0] or self.field[2][0] or self.field[3][0]:
k = - self.field[0][1]
else:
k = self.field[0][0]
else:
if self.state is not self.field[0][3] or self.field[1][3] or self.field[2][3] or self.field[3][3]:
k = self.field[0][1]
else:
k = self.field[0][0]
return k
def step(self,action):
next_state = self.state + self.move(action)
if next_state == self.field[0][1]:
reward = -10
done = True
elif next_state == self.field[1][3]:
reward = -10
done = True
elif next_state == self.field[3][3]:
reward = 100
done = True
else:
reward = 0
done = False
return next_state, reward, done
class QlearningAgent:
def __init__(self, actions):
self.actions = actions
self.learning_rate = 0.01
self.discount_factor = 0.9
self.eplision = 0.1
self.q_tabel =[[0 for col in range(4)] for row in range(16)]
def learn(self, state, action, reward, next_state):
b = next_state.tolist()
a = state.tolist()
q_value= self.q_tabel[a][action]
q_new = reward + self.discount_factor * max(self.q_tabel[b])
print(q_new)
self.q_tabel[a][action] += self.learning_rate*(q_new - q_value)
def get_action(self,state):
if np.random.rand() < self.eplision:
action = np.random.choice(self.actions)
else:
a = state.tolist()
state_action = self.q_tabel[a]
action = self.arg_max(state_action)
return action
@staticmethod ##
def arg_max(state_action):
max_index_list = []
max_value = state_action[0]
for index, value in enumerate(state_action):
if value > max_value:
max_index_list.clear()
max_value = value
max_index_list.append(index)
elif value == max_value:
max_index_list.append(index)
return random.choice(max_index_list)
if __name__ == "__main__":
env = Env()
agent = QlearningAgent(actions=list(range(len(env.CanDoAction))))
for episodes in range(10):
env.reset()
state = env.state
while True:
action = agent.get_action(state)
next_state, reward, done = env.step(action)
agent.learn(state,action, reward, next_state)
state = next_state
print(agent.q_tabel)
if done:
break