Question

我在加载模型时遇到了问题（我认为是重量）。

我有两个模型：大脑（一个大脑对象）和思维（一个大脑对象）。

它们分别由 3个conv2d 层和 2个完全连接的层组成。它们在Brain类的 init_model 方法中定义。我需要加载“大脑”模型的权重，因为大脑可以预测吃豆人的动作。

我这样启动程序：

我重置默认图形并开始会话。我在大脑上设置了作用域标签，这是标签：“大脑” 。

from brain import *
from memory import *
from game import *
from frame_processor import FrameProcessor

tf.reset_default_graph()

with tf.Session() as session:

    MEMORY_CAPACITY = 500000
#------------------------------
# MSPACMAN
#------------------------------
UP    = 1
RIGHT = 2
LEFT  = 3
DOWN  = 4
MAP_ACTIONS = {0: UP, 1: RIGHT, 2: LEFT, 3: DOWN}
#------------------------------

brain = Brain('brain', MAP_ACTIONS)
mind  = Brain('mind', MAP_ACTIONS)

sensor = FrameProcessor()
memory = Memory(MEMORY_CAPACITY)
agent = Agent(brain, mind, sensor, memory)

pacman = "MsPacman-v0"

# ---------------------------------------
# PLAY
# ---------------------------------------
atariGame = Game(session,
                    gameId=pacman,
                    algorithm='ddqn',
                    nb_episodes=50001,
                    mini_batch_size=32,
                    discount=0.99,
                    gather_experience=50000,
                    save_each=5000,
                    update_mind_steps=2000,
                    log_level='warning')

atariGame.play(agent,rendering=True, path="2018_August_31__12_06_28")

当我运行训练模式时，我执行每个情节数（这里每个5000 ep）

self.saver.save(session, path, global_step=self.global_ep)

Game对象的方法播放，init_restoration_tools函数创建文件夹。

    def play(self, agent, rendering, path=None,episode=None):

        logging.info('Initialization global variables ...')
        self.session.run(tf.global_variables_initializer())

        logging.info('Initialization restoration tools ...')
        agent.init_restoration_tools(path)

        agent.restore(self.session, episode)

        self.playing(agent, self.nb_episodes, rendering)

在 init 构造器中，我得到标签'brain'，然后调用 init_model（）函数

class Brain(object):
def __init__(self,tag,MAP_ACTION):
    self.scope = tag
    self.LEARNING_RATE = 0.0001
    self.GAMMA = 0.99
    self.MOMENTUM = 0.0
    self.EPSILON = 1e-10
    self.nb_actions = len(MAP_ACTION)
    self.map = MAP_ACTION
    self.init_actions(self.map)
    self.summary_writer = None

    with tf.variable_scope(self.scope):
        self.init_model()

def init_model(self):

    # ---------------------------------------
    # INPUTS
    # ---------------------------------------
    self.features = tf.placeholder(dtype=tf.uint8, shape=[None,80,80,4], name='features')
    self.labels = tf.placeholder(dtype=tf.float32, shape=[None], name='labels')
    self.actions = tf.placeholder(dtype=tf.int32, shape=[None], name="actions")

    #---------------------------------------
    # NORMALIZE INPUTS VALUES
    #---------------------------------------
    features_normalized = tf.to_float(self.features) / 255.0

    #---------------------------------------------------
    # Features learning
    #---------------------------------------------------

    # inputs, filter_num, kernel_size, stride, (need padding = 2 -> padding='SAME' add required padding),..., activation
    conv1 = tf.contrib.layers.conv2d(features_normalized, 32, 8, 4,
                                     padding='SAME',
                                     activation_fn=tf.nn.relu,
                                     scope='conv1')
    # conv1 : (20,20,32)
    conv2 = tf.contrib.layers.conv2d(conv1, 64, 4, 2,
                                     padding='SAME',
                                     activation_fn=tf.nn.relu,
                                     scope='conv2')
    # conv2 : (9,9,64)
    conv3 = tf.contrib.layers.conv2d(conv2, 64, 3, 1,
                                     padding='SAME',
                                     activation_fn=tf.nn.relu,
                                     scope='conv3')
    # conv3 : (7,7,64)

    #---------------------------------------------------
    # Classification
    #---------------------------------------------------
    flatten_1d = tf.contrib.layers.flatten(conv3)

    #inputs, outputs, activation
    fc4 = tf.contrib.layers.fully_connected(flatten_1d, 512,
                                            activation_fn=tf.nn.relu,
                                            scope='fc4')
    # fc4 : (1,512)

    # fc5 = predictions = Q VALUES
    # The default value is a ReLU function. Explicitly set it to None to skip it and maintain a linear activation.
    self.predictions = tf.contrib.layers.fully_connected(fc4, self.nb_actions,
                                                         activation_fn=None,
                                                         scope='fc5')
    # fc5 : (512, nb_actions)

    #---------------------------------------------------
    # LOSS AND Q VALUES
    #---------------------------------------------------

    #Find brain Q values of minibatch's states and map it with samples actions
    nb_batches = tf.shape(self.features)[0]
    mapped_actions = tf.map_fn(lambda x: self.reverse_map.lookup(x), self.actions)
    actions_indices = tf.range(nb_batches) * tf.shape(self.predictions)[1] + mapped_actions
    q_values = tf.gather(tf.reshape(self.predictions,[-1]), actions_indices)

    # mind Q values : [qv0,qv1,qv3, ... , qvn]
    self.losses = tf.squared_difference(self.labels, q_values)
    # mean all losses
    self.loss = tf.reduce_mean(self.losses)
    # self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE, self.GAMMA, self.MOMENTUM, self.EPSILON)
    self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE)
    # compute_gradients and apply_gradients
    self.train_op = self.optimizer.minimize(self.loss)

我以这种方式恢复了模型，但无法正常工作！我的吃豆人玩得不好。我不明白，因为在我的训练模式下，平滑的奖励非常好。这就是为什么我确定这是一个加载问题。

self.saver = tf.train.import_meta_graph(self.latest_checkpoint + '.meta')
self.saver.restore(session, self.latest_checkpoint)

我应该怎么做才能恢复模型？

还原模型不起作用-Tensorflow

0 个答案: