pybrain的pomdp迷宫无法运行

时间:2018-06-05 14:08:00

标签: maze reinforcement-learning pybrain markov-decision-process

我需要使用pybrain库将以下mdp迷宫改为pomdp迷宫。 以下代码实现了mdp迷宫(pybrain教程):

rl.py

from scipy import * #@unusedwildimport
import matplotlib.pyplot as plt
from pybrain.rl.environments.mazes import Maze, MDPMazeTask
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA #@unusedimport
from pybrain.rl.experiments import Experiment

plt.gray()
plt.ion()
structure = array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 0, 1, 0, 0, 1, 0, 1],
[1, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 0, 1, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1]])
environment = Maze(structure, (7, 7))
controller = ActionValueTable(81, 4)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
while True:
experiment.doInteractions(100)
agent.learn()
agent.reset()
plt.pcolor(controller.params.reshape(81,4).max(1).reshape(9,9))
plt.show()
plt.pause(0.1)

mdp.py

from pybrain.rl.environments import Task
from scipy import array

class MDPMazeTask(Task):
    """ This is a MDP task for the MazeEnvironment. The state is fully observable,
        giving the agent the current position of perseus. Reward is given on reaching
        the goal, otherwise no reward. """

    def getReward(self):
        """ compute and return the current reward (i.e. corresponding to the last action performed) """
        if self.env.goal == self.env.perseus:
            self.env.reset()
            reward = 1.
        else:
            reward = 0.
        return reward

    def performAction(self, action):
        """ The action vector is stripped and the only element is cast to integer and given
            to the super class.
        """
        Task.performAction(self, int(action[0]))


    def getObservation(self):
        """ The agent receives its position in the maze, to make this a fully observable
            MDP problem.
        """
        obs = array([self.env.perseus[0] * self.env.mazeTable.shape[0] + self.env.perseus[1]])
        return obs

maze.py

from scipy import array

from .pomdp import POMDPTask
from pybrain.rl.environments.mazes import Maze
from pybrain.rl.environments.task import Task


class MazeTask(POMDPTask):
    """ a task corresponding to a maze environment """

    bangPenalty = 0
    defaultPenalty = 0
    finalReward = 1

    topology = None
    goal = None
    initPos = None
    mazeclass = Maze

    stochObs = 0
    stochAction = 0

    @property
    def noisy(self):
        return self.stochObs > 0


    def __init__(self, **args):
        self.setArgs(**args)
        Task.__init__(self, self.mazeclass(self.topology, self.goal, initPos=self.initPos,
                                           stochObs=self.stochObs, stochAction=self.stochAction))
        self.minReward = min(self.bangPenalty, self.defaultPenalty)
        self.reset()

    def getReward(self):
        if self.env.perseus == self.env.goal:
            return self.finalReward
        elif self.env.bang:
            return self.bangPenalty
        else:
            return self.defaultPenalty

    def isFinished(self):
        return self.env.perseus == self.env.goal or POMDPTask.isFinished(self)

    def __str__(self):
        return str(self.env)


class TrivialMaze(MazeTask):
    """
    #####
    #. *#
    #####
    """
    discount = 0.8
    initPos = [(1, 1)]
    topology = array([[1] * 5,
                      [1, 0, 0, 0, 1],
                      [1] * 5, ])
    goal = (1, 3)

我尝试将task = MDPMazeTask(environment)替换为task = TrivialMaze()

,将其更改为pomdp迷宫

但是遇到了一些错误。

error:
    Traceback (most recent call last):
  File "/home/web/pybrain-master/docs/tutorials/rl.py", line 140, in <module>
    experiment.doInteractions(100)# speed of learning
  File "/home/web/pybrain-master/pybrain/rl/experiments/experiment.py", line 18, in doInteractions
    self._oneInteraction()
  File "/home/web/pybrain-master/pybrain/rl/experiments/experiment.py", line 27, in _oneInteraction
    self.task.performAction(self.agent.getAction())
  File "/home/web/pybrain-master/pybrain/rl/agents/learning.py", line 50, in getAction
    self.lastaction = self.module.activate(self.lastobs)
  File "/home/web/pybrain-master/pybrain/structure/modules/module.py", line 121, in activate
    assert len(self.inputbuffer[self.offset]) == len(inpt),    str((len(self.inputbuffer[self.offset]), len(inpt)))
AssertionError: (1, 4)

在任何地方都没有在他们的网站上解释过 http://pybrain.org/。有人可以帮助我吗?

0 个答案:

没有答案