我需要使用pybrain库将以下mdp迷宫改为pomdp迷宫。 以下代码实现了mdp迷宫(pybrain教程):
rl.py
from scipy import * #@unusedwildimport
import matplotlib.pyplot as plt
from pybrain.rl.environments.mazes import Maze, MDPMazeTask
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA #@unusedimport
from pybrain.rl.experiments import Experiment
plt.gray()
plt.ion()
structure = array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 0, 1, 0, 0, 1, 0, 1],
[1, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 0, 1, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1]])
environment = Maze(structure, (7, 7))
controller = ActionValueTable(81, 4)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
while True:
experiment.doInteractions(100)
agent.learn()
agent.reset()
plt.pcolor(controller.params.reshape(81,4).max(1).reshape(9,9))
plt.show()
plt.pause(0.1)
mdp.py
from pybrain.rl.environments import Task
from scipy import array
class MDPMazeTask(Task):
""" This is a MDP task for the MazeEnvironment. The state is fully observable,
giving the agent the current position of perseus. Reward is given on reaching
the goal, otherwise no reward. """
def getReward(self):
""" compute and return the current reward (i.e. corresponding to the last action performed) """
if self.env.goal == self.env.perseus:
self.env.reset()
reward = 1.
else:
reward = 0.
return reward
def performAction(self, action):
""" The action vector is stripped and the only element is cast to integer and given
to the super class.
"""
Task.performAction(self, int(action[0]))
def getObservation(self):
""" The agent receives its position in the maze, to make this a fully observable
MDP problem.
"""
obs = array([self.env.perseus[0] * self.env.mazeTable.shape[0] + self.env.perseus[1]])
return obs
maze.py
from scipy import array
from .pomdp import POMDPTask
from pybrain.rl.environments.mazes import Maze
from pybrain.rl.environments.task import Task
class MazeTask(POMDPTask):
""" a task corresponding to a maze environment """
bangPenalty = 0
defaultPenalty = 0
finalReward = 1
topology = None
goal = None
initPos = None
mazeclass = Maze
stochObs = 0
stochAction = 0
@property
def noisy(self):
return self.stochObs > 0
def __init__(self, **args):
self.setArgs(**args)
Task.__init__(self, self.mazeclass(self.topology, self.goal, initPos=self.initPos,
stochObs=self.stochObs, stochAction=self.stochAction))
self.minReward = min(self.bangPenalty, self.defaultPenalty)
self.reset()
def getReward(self):
if self.env.perseus == self.env.goal:
return self.finalReward
elif self.env.bang:
return self.bangPenalty
else:
return self.defaultPenalty
def isFinished(self):
return self.env.perseus == self.env.goal or POMDPTask.isFinished(self)
def __str__(self):
return str(self.env)
class TrivialMaze(MazeTask):
"""
#####
#. *#
#####
"""
discount = 0.8
initPos = [(1, 1)]
topology = array([[1] * 5,
[1, 0, 0, 0, 1],
[1] * 5, ])
goal = (1, 3)
我尝试将task = MDPMazeTask(environment)
替换为task = TrivialMaze()
但是遇到了一些错误。
error:
Traceback (most recent call last):
File "/home/web/pybrain-master/docs/tutorials/rl.py", line 140, in <module>
experiment.doInteractions(100)# speed of learning
File "/home/web/pybrain-master/pybrain/rl/experiments/experiment.py", line 18, in doInteractions
self._oneInteraction()
File "/home/web/pybrain-master/pybrain/rl/experiments/experiment.py", line 27, in _oneInteraction
self.task.performAction(self.agent.getAction())
File "/home/web/pybrain-master/pybrain/rl/agents/learning.py", line 50, in getAction
self.lastaction = self.module.activate(self.lastobs)
File "/home/web/pybrain-master/pybrain/structure/modules/module.py", line 121, in activate
assert len(self.inputbuffer[self.offset]) == len(inpt), str((len(self.inputbuffer[self.offset]), len(inpt)))
AssertionError: (1, 4)
在任何地方都没有在他们的网站上解释过 http://pybrain.org/。有人可以帮助我吗?