我在加载模型时遇到了问题(我认为是重量)。
我有两个模型:大脑(一个大脑对象)和思维(一个大脑对象)。
它们分别由 3个conv2d 层和 2个完全连接的层组成。它们在Brain类的 init_model 方法中定义。我需要加载“大脑”模型的权重,因为大脑可以预测吃豆人的动作。
我这样启动程序:
我重置默认图形并开始会话。我在大脑上设置了作用域标签,这是标签:“大脑” 。
from brain import *
from memory import *
from game import *
from frame_processor import FrameProcessor
tf.reset_default_graph()
with tf.Session() as session:
MEMORY_CAPACITY = 500000
#------------------------------
# MSPACMAN
#------------------------------
UP = 1
RIGHT = 2
LEFT = 3
DOWN = 4
MAP_ACTIONS = {0: UP, 1: RIGHT, 2: LEFT, 3: DOWN}
#------------------------------
brain = Brain('brain', MAP_ACTIONS)
mind = Brain('mind', MAP_ACTIONS)
sensor = FrameProcessor()
memory = Memory(MEMORY_CAPACITY)
agent = Agent(brain, mind, sensor, memory)
pacman = "MsPacman-v0"
# ---------------------------------------
# PLAY
# ---------------------------------------
atariGame = Game(session,
gameId=pacman,
algorithm='ddqn',
nb_episodes=50001,
mini_batch_size=32,
discount=0.99,
gather_experience=50000,
save_each=5000,
update_mind_steps=2000,
log_level='warning')
atariGame.play(agent,rendering=True, path="2018_August_31__12_06_28")
当我运行训练模式时,我执行每个情节数(这里每个5000 ep)
self.saver.save(session, path, global_step=self.global_ep)
Game对象的方法播放,init_restoration_tools函数创建文件夹。
def play(self, agent, rendering, path=None,episode=None):
logging.info('Initialization global variables ...')
self.session.run(tf.global_variables_initializer())
logging.info('Initialization restoration tools ...')
agent.init_restoration_tools(path)
agent.restore(self.session, episode)
self.playing(agent, self.nb_episodes, rendering)
在 init 构造器中,我得到标签'brain',然后调用 init_model()函数
class Brain(object):
def __init__(self,tag,MAP_ACTION):
self.scope = tag
self.LEARNING_RATE = 0.0001
self.GAMMA = 0.99
self.MOMENTUM = 0.0
self.EPSILON = 1e-10
self.nb_actions = len(MAP_ACTION)
self.map = MAP_ACTION
self.init_actions(self.map)
self.summary_writer = None
with tf.variable_scope(self.scope):
self.init_model()
def init_model(self):
# ---------------------------------------
# INPUTS
# ---------------------------------------
self.features = tf.placeholder(dtype=tf.uint8, shape=[None,80,80,4], name='features')
self.labels = tf.placeholder(dtype=tf.float32, shape=[None], name='labels')
self.actions = tf.placeholder(dtype=tf.int32, shape=[None], name="actions")
#---------------------------------------
# NORMALIZE INPUTS VALUES
#---------------------------------------
features_normalized = tf.to_float(self.features) / 255.0
#---------------------------------------------------
# Features learning
#---------------------------------------------------
# inputs, filter_num, kernel_size, stride, (need padding = 2 -> padding='SAME' add required padding),..., activation
conv1 = tf.contrib.layers.conv2d(features_normalized, 32, 8, 4,
padding='SAME',
activation_fn=tf.nn.relu,
scope='conv1')
# conv1 : (20,20,32)
conv2 = tf.contrib.layers.conv2d(conv1, 64, 4, 2,
padding='SAME',
activation_fn=tf.nn.relu,
scope='conv2')
# conv2 : (9,9,64)
conv3 = tf.contrib.layers.conv2d(conv2, 64, 3, 1,
padding='SAME',
activation_fn=tf.nn.relu,
scope='conv3')
# conv3 : (7,7,64)
#---------------------------------------------------
# Classification
#---------------------------------------------------
flatten_1d = tf.contrib.layers.flatten(conv3)
#inputs, outputs, activation
fc4 = tf.contrib.layers.fully_connected(flatten_1d, 512,
activation_fn=tf.nn.relu,
scope='fc4')
# fc4 : (1,512)
# fc5 = predictions = Q VALUES
# The default value is a ReLU function. Explicitly set it to None to skip it and maintain a linear activation.
self.predictions = tf.contrib.layers.fully_connected(fc4, self.nb_actions,
activation_fn=None,
scope='fc5')
# fc5 : (512, nb_actions)
#---------------------------------------------------
# LOSS AND Q VALUES
#---------------------------------------------------
#Find brain Q values of minibatch's states and map it with samples actions
nb_batches = tf.shape(self.features)[0]
mapped_actions = tf.map_fn(lambda x: self.reverse_map.lookup(x), self.actions)
actions_indices = tf.range(nb_batches) * tf.shape(self.predictions)[1] + mapped_actions
q_values = tf.gather(tf.reshape(self.predictions,[-1]), actions_indices)
# mind Q values : [qv0,qv1,qv3, ... , qvn]
self.losses = tf.squared_difference(self.labels, q_values)
# mean all losses
self.loss = tf.reduce_mean(self.losses)
# self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE, self.GAMMA, self.MOMENTUM, self.EPSILON)
self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE)
# compute_gradients and apply_gradients
self.train_op = self.optimizer.minimize(self.loss)
我以这种方式恢复了模型,但无法正常工作!我的吃豆人玩得不好。我不明白,因为在我的训练模式下,平滑的奖励非常好。这就是为什么我确定这是一个加载问题。
self.saver = tf.train.import_meta_graph(self.latest_checkpoint + '.meta')
self.saver.restore(session, self.latest_checkpoint)
我应该怎么做才能恢复模型?