我正在使用张量板可视化我的神经网络(NN)。我的神经网络的结构如下图所示。
我有4个输入,分为两个组:1)x-peg-input和x-qog-input通过peg_qog_module 2)x-ap-input和x-d-input通过ap_d_module。这两个模块的输出在组合模块中组合。然后,使用组合模块的输出来计算损耗,梯度并训练模型。
我的问题是为什么x-ap-input和x-d-input没有数据流连接来完成火车(这是我的train_op),而x-peg-input和x-qog-input却有。参见下图。在此图上,添加了complete_train。据我所知,所有输入都应具有train_op的连接,因为优化器需要输入值以将损耗反向传播到每个可调权重并更新它们。到目前为止,模型正在运行,并且确实更新了ap_d_module的权重(例如ap1,ap2,d1,d2)。但是,complete_train(train_op)的输入没有x-ap输入和x-d输入,如下图所示。
tensorboard graph with train_op added
以下是所有相关代码。请注意,出于调试目的,此模型已大大简化。因此,以下代码中定义的某些操作可能看起来很奇怪。他们是
# modules definition
def ap_d_module(x_ap, x_d, name='ap_d_module'):
with tf.name_scope(name):
with tf.variable_scope("ap1", reuse=tf.AUTO_REUSE):
ap_w1 = tf.get_variable('weights', [12, n_ap_hidden_1], initializer=tf.truncated_normal_initializer(stddev=0.1))
ap_b1 = tf.get_variable('biases', [n_ap_hidden_1], initializer=tf.constant_initializer(0.1))
ap_h1 = tf.tanh(tf.matmul(tf.reshape(x_ap, [-1, 12]), ap_w1) + ap_b1)
with tf.variable_scope("ap2", reuse=tf.AUTO_REUSE):
ap_w2 = tf.get_variable('weights', [n_ap_hidden_1, n_ap_hidden_2], initializer=tf.truncated_normal_initializer(stddev=0.1))
ap_b2 = tf.get_variable('biases', [n_ap_hidden_2], initializer=tf.constant_initializer(0.1))
ap_h2 = tf.matmul(ap_h1, ap_w2) + ap_b2
with tf.variable_scope("d1", reuse=tf.AUTO_REUSE):
d_w1 = tf.get_variable('weights', [12, n_d_hidden_1], initializer=tf.truncated_normal_initializer(stddev=0.1))
d_b1 = tf.get_variable('biases', [n_d_hidden_1], initializer=tf.constant_initializer(0.1))
d_h1 = tf.tanh(tf.matmul(tf.reshape(x_d, [-1,12]), d_w1) + d_b1)
with tf.variable_scope("d2", reuse=tf.AUTO_REUSE):
d_w2 = tf.get_variable('weights', [n_d_hidden_1, n_d_hidden_2], initializer=tf.truncated_normal_initializer(stddev=0.1))
d_b2 = tf.get_variable('biases', [n_d_hidden_2], initializer=tf.constant_initializer(0.1))
d_h2 = tf.matmul(d_h1, d_w2) + d_b2
return tf.reshape(tf.reduce_sum(tf.concat([ap_h2, d_h2], axis=1), 1), [-1,1])
def peg_qog_module(x_peg, x_qog, name='peg_qog_module'):
with tf.name_scope(name):
with tf.variable_scope("peg1", reuse=tf.AUTO_REUSE):
peg_w1 = tf.get_variable('weights', [peg_num_input, n_peg_hidden_1], initializer=tf.truncated_normal_initializer(stddev=0.1))
peg_b1 = tf.get_variable('biases', [n_peg_hidden_1], initializer=tf.constant_initializer(0.1))
peg_h1 = tf.tanh(tf.matmul(x_peg, peg_w1) + peg_b1)
with tf.variable_scope("peg2", reuse=tf.AUTO_REUSE):
peg_w2 = tf.get_variable('weights', [n_peg_hidden_1, n_peg_hidden_2], initializer=tf.truncated_normal_initializer(stddev=0.1))
peg_b2 = tf.get_variable('biases', [n_peg_hidden_2], initializer=tf.constant_initializer(0.1))
peg_h2 = tf.matmul(peg_h1, peg_w2) + peg_b2
with tf.variable_scope("qog1", reuse=tf.AUTO_REUSE):
qog_w1 = tf.get_variable('weights', [qog_num_input, n_qog_hidden_1], initializer=tf.truncated_normal_initializer(stddev=0.1))
qog_b1 = tf.get_variable('biases', [n_qog_hidden_1], initializer=tf.constant_initializer(0.1))
qog_h1 = tf.tanh(tf.matmul(x_qog, qog_w1) + qog_b1)
with tf.variable_scope("qog2", reuse=tf.AUTO_REUSE):
qog_w2 = tf.get_variable('weights', [n_qog_hidden_1, n_qog_hidden_2], initializer=tf.truncated_normal_initializer(stddev=0.1))
qog_b2 = tf.get_variable('biases', [n_qog_hidden_2], initializer=tf.constant_initializer(0.1))
qog_h2 = tf.matmul(qog_h1, qog_w2) + qog_b2
peg_qog = tf.concat([peg_h2, qog_h2], axis=1)
return tf.reshape(tf.reduce_sum(peg_qog, 1), [-1, 1])
def combination_module(x_momentum, x_growth, name='combination_module'):
with tf.name_scope(name):
momentum_growth = tf.concat([x_momentum, x_growth], axis=1)
with tf.variable_scope("combination1", reuse=tf.AUTO_REUSE):
w1 = tf.get_variable('W1', [n_momentum_growth, n_combined_hidden_1], initializer=tf.truncated_normal_initializer(stddev=0.1))
b1 = tf.get_variable('B1', [n_combined_hidden_1], initializer=tf.constant_initializer(0.1))
combined_h1 = tf.tanh(tf.matmul(momentum_growth, w1) + b1, name='h1_activation')
with tf.variable_scope("combination2", reuse=tf.AUTO_REUSE):
w2 = tf.get_variable('W2', [n_combined_hidden_1, n_combined_hidden_2], initializer=tf.truncated_normal_initializer(stddev=0.1))
b2 = tf.get_variable('B2', [n_combined_hidden_2], initializer=tf.constant_initializer(0.1))
combined_h2 = tf.tanh(tf.matmul(combined_h1, w2) + b2, name='h2_activation')
tf.summary.histogram('comb_h1', w1)
tf.summary.histogram('comb_b1', b1)
tf.summary.histogram('comb_h2', w2)
tf.summary.histogram('comb_b2', b2)
return combined_h2
# graph connections
tf.reset_default_graph()
X_peg_placeholder = tf.placeholder("float", [None, peg_num_input], name='x-peg-input')
X_qog_placeholder = tf.placeholder("float", [None, qog_num_input], name='x-qog-input')
X_ap_placeholder = tf.placeholder("float", [None, timestep, ap_num_input], name='x-ap-input')
X_d_placeholder = tf.placeholder("float", [None, timestep, d_num_input], name='x-d-input')
y_placeholder = tf.placeholder("float", [None, 1], name='y-label')
# Construct model
x_momentum = peg_qog_module(X_peg_placeholder, X_qog_placeholder)
x_growth = ap_d_module(X_ap_placeholder, X_d_placeholder)
complete_prediction = combination_module(x_momentum, x_growth)
with tf.name_scope('mse_complete_loss'):
complete_loss_op = tf.losses.mean_squared_error(predictions=complete_prediction, labels=y_placeholder)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
with tf.name_scope('complete_train'):
complete_train_op = optimizer.minimize(complete_loss_op)