我正在制作用于学习机器学习但无法正常工作的Q表

时间:2019-06-25 13:05:14

标签: python machine-learning

我将我的代码Q表制作为书名“学习通过Python和Keras进行学习的强化学习”,我想用Simpy查看结果。因此,我修复了它,但是它不起作用。

我在代码中找不到错误,因此我尝试更改变量。我的代码:

import numpy as np
import random
width = 4
height = 4
class Env:
    def __init__(self):
        self.CanDoAction = [0,1,2,3]   # up down left right
        self.field = np.arange(16).reshape(width,height)     #Environment

    def reset(self):
        self.state=self.field[0][0]    # agent start position
        self.done = False

    def move(self, action):  # move and boundry Condition
        if self.CanDoAction[action] == 0:
            if self.state is not self.field[0][0] or self.field[0][1] or self.field[0][2] or self.field[0][3]:
                k = -self.field[1][0]
            else:
                k = self.field[0][0]

        elif self.CanDoAction[action] == 1:
            if self.state  is not self.field[3][0] or self.field[3][1] or self.field[3][2] or self.field[3][3]:
                k = self.field[1][0]
            else:
                k = self.field[0][0]

        elif self.CanDoAction[action] == 2:
            if self.state is not self.field[0][0] or self.field[1][0] or self.field[2][0] or self.field[3][0]:
                k = - self.field[0][1]
            else:
                k = self.field[0][0]

        else:
            if self.state  is not self.field[0][3] or self.field[1][3] or self.field[2][3] or self.field[3][3]:
                k = self.field[0][1]
            else:
                k = self.field[0][0]
        return k

    def step(self,action):
        next_state = self.state + self.move(action)
        if next_state == self.field[0][1]:
            reward = -10
            done = True
        elif next_state == self.field[1][3]:
            reward =  -10
            done = True
        elif next_state == self.field[3][3]:
            reward = 100
            done = True
        else:
            reward = 0
            done = False
        return next_state, reward, done

class QlearningAgent:
    def __init__(self, actions):
        self.actions = actions
        self.learning_rate = 0.01
        self.discount_factor = 0.9
        self.eplision = 0.1
        self.q_tabel  =[[0 for col in range(4)] for row in range(16)]

    def learn(self, state, action, reward, next_state):
        b = next_state.tolist()
        a = state.tolist()
        q_value= self.q_tabel[a][action]

        q_new = reward + self.discount_factor * max(self.q_tabel[b])
        print(q_new)
        self.q_tabel[a][action] += self.learning_rate*(q_new - q_value)

    def get_action(self,state):
        if np.random.rand() < self.eplision:
            action = np.random.choice(self.actions)
        else:
            a = state.tolist()
            state_action = self.q_tabel[a]
            action = self.arg_max(state_action)
        return action

    @staticmethod  ##
    def arg_max(state_action):
        max_index_list = []
        max_value = state_action[0]
        for index, value in enumerate(state_action):
            if value > max_value:
                max_index_list.clear()
                max_value = value
                max_index_list.append(index)
            elif value == max_value:
                max_index_list.append(index)
        return random.choice(max_index_list)

if __name__ ==  "__main__":
    env = Env()
    agent = QlearningAgent(actions=list(range(len(env.CanDoAction))))
    for episodes in range(10):
        env.reset()
        state = env.state
        while True:
            action = agent.get_action(state)
            next_state, reward, done = env.step(action)
            agent.learn(state,action, reward, next_state)

            state = next_state
            print(agent.q_tabel)

            if done:
                break

0 个答案:

没有答案