如何在张量板上可视化每一层的权重?

时间:2019-07-02 07:10:47

标签: tensorflow keras neural-network deep-learning tensorboard

我正在编写用于深度q学习模型的代码,它包含两个类DQN网络和代理。我希望可视化每一层的权重和偏差,并观察它是否随着网络学习而改变。它包含两个用于预测当前状态的Q值的网络(q_eval网络)和一个用于预测目标Q值的网络(q_next网络),我希望将每个网络的权重可视化。如果您可以帮助我,那么网络体系结构中也可能存在一个错误,那就太棒了。

我已经尝试过tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES()),但是它无法正常工作,也许是我做错了。

DeepQNetwork(object)类:

def __init__(self, lr, n_actions, name, fc1_dims=1024,
             #input_dims=(210, 160, 4),
             input_dims=(3, 4), chkpt_dir="tmp/dqn"):
    self.lr = lr
    self.name = name
    self.n_actions = n_actions
    self.fc1_dims = fc1_dims
    self.chkpt_dir = chkpt_dir
    self.input_dims = input_dims
    self.sess = tf.Session()
    self.build_network()
    self.sess.run(tf.global_variables_initializer())
    self.saver = tf.train.Saver()
    self.checkpoint_file = os.path.join(chkpt_dir, "deepqnet.ckpt")
    self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope=self.name)
    self.write_op = tf.summary.merge([self.accuracy_sum, self.loss_sum, self.summ])
    self.writer = tf.summary.FileWriter("tmp/log_dir")
    self.writer.add_graph(self.sess.graph)


    # The list of values in the collection with the given name
    # or an empty list if no value has been added to that collection.
    # trainable variables are the whose values are updated while performing optimisation.

def build_network(self):
    with tf.variable_scope(self.name):
        self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims],
                                    name='inputs')
        # * here indicates that the function can take multiple inputs as arguments into the function.
        self.actions = tf.placeholder(tf.float32, shape=[None, self.n_actions],
                                     name='action_taken')
        self.q_target = tf.placeholder(tf.float32, shape=[None, self.n_actions],
                                       name='q_target')

        # 1st dimension inside shape is set to None because we want to pass
        # batches of stacked frame into the neural network.

        conv1 = tf.layers.conv2d(inputs=self.input, filters=32,
                                 kernel_size=(8, 8), strides=4, name='conv1',
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
        conv1_activated = tf.nn.relu(conv1)


        conv2 = tf.layers.conv2d(inputs=conv1_activated, filters=64,
                                 kernel_size=(4, 4), strides=2, name='conv2',
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
        conv2_activated = tf.nn.relu(conv2)


        conv3 = tf.layers.conv2d(inputs=conv2_activated, filters=128,
                                 kernel_size=(3, 3), strides=1, name='conv3',
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
        conv3_activated = tf.nn.relu(conv3)

        flat = tf.contrib.layers.flatten(conv3_activated)

        dense1 = tf.layers.dense(flat, units=self.fc1_dims, activation=tf.nn.relu,
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))

        self.Q_values = tf.layers.dense(dense1, units=self.n_actions,
                                        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))


        self.q = tf.reduce_sum(tf.multiply(self.Q_values, self.actions))
        self.accuracy_sum = tf.summary.scalar('Accuracy', self.q)

        self.loss = tf.reduce_mean(tf.square(self.q - self.q_target))
        self.loss_sum = tf.summary.scalar("Loss", self.loss)

        self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)

        for var in tf.trainable_variables():
            print(var.name[:-2])
            self.summ = tf.summary.histogram(var.name[:-2], var)

Agent类(对象):     def init (自我,alpha,伽玛,mem_size,n_actions,epsilon,batch_size,                  replace_target = 10000,input_dims =(210,160,4),                  q_next_dir =“ tmp / q_next”,q_eval_dir =“ tmp / q_eval”):         self.n_actions = n_actions         self.action_space = [i for i in range(self.n_actions)]         #对于n_actions = 3,action_space是一个列表[0,1,2]         self.gamma =伽玛         self.mem_size = mem_size         self.mem_cntr = 0         self.epsilon = epsilon         self.batch_size =批处理大小         self.replace_target = replace_target

    self.q_next = DeepQNetwork(alpha, n_actions, input_dims=input_dims,
                               name='q_next', chkpt_dir=q_next_dir)
    self.q_eval = DeepQNetwork(alpha, n_actions, input_dims=input_dims,
                               name='q_eval', chkpt_dir=q_eval_dir)

def learn(self):
    if self.mem_cntr % self.replace_target == 0:
        self.update_graph()
    # we update the graph after every K steps, so that the q_target is not fluctuating.

    max_mem = self.mem_cntr if self.mem_cntr < self.mem_size else self.mem_size

    batch = np.random.choice(max_mem, self.batch_size)
    # Batch is of the length equal to batch size with elements that are generated using np.arange(max_mem) (b).

    state_batch = self.state_memory[batch]
    # Shape of the state batch is equal to (batch_size, input_dims)
    # ex: (32, 180, 160, 4)

    action_batch = self.action_memory[batch]
    action_values = np.array([0, 1, 2], dtype=np.int8)
    action_indices = np.dot(action_batch, action_values)
    reward_batch = self.reward_memory[batch]
    new_state_batch = self.new_state_memory[batch]
    terminal_batch = self.terminal_memory[batch]

    q_eval = self.q_eval.sess.run(self.q_eval.Q_values,
                                  feed_dict={self.q_eval.input: state_batch})
    # It has shape (batch_size, n_actions).
    # This gives Q values for each action, in this case 3 actions, using q_eval network for current state batch.

    q_next = self.q_next.sess.run(self.q_next.Q_values,
                                  feed_dict={self.q_next.input: new_state_batch})
    # This gives Q values for the next state using the q_next network.

    q_target = q_eval.copy()
    idx = np.arange(self.batch_size)
    q_target[idx, action_indices] = reward_batch + \
        self.gamma*np.max(q_next, axis=1)*terminal_batch
    # axis= 1 means along each row we calculate the maximum value, where rows are the actions.

    #q_target = np.zeros(self.batch_size)
    #q_target = reward_batch + self.gamma*np.max(q_next, axis =1)*terminal_batch

    _ = self.q_eval.sess.run(self.q_eval.train_op,
                             feed_dict={self.q_eval.input: state_batch,
                                        self.q_eval.actions: action_batch,
                                        self.q_eval.q_target: q_target})

    loss = self.q_eval.sess.run(self.q_eval.loss,
                                feed_dict={self.q_eval.input: state_batch,
                                           self.q_eval.actions: action_batch,
                                           self.q_eval.q_target: q_target})



    summary = self.q_eval.sess.run(self.q_eval.write_op,
                                   feed_dict={self.q_eval.input: state_batch,
                                              self.q_eval.actions: action_batch,
                                              self.q_eval.q_target: q_target,
                                              self.q_next.input: new_state_batch})

    self.q_eval.writer.add_summary(summary, time.time())
    self.q_eval.writer.flush()

运行此代码时,我只能看到仅一层的偏差,即密集层q_eval网络的偏差。

0 个答案:

没有答案