我正在测试tf.variable_scope对象以在同一session.run调用中多次重用网络:
data1 = tf.constant([[[3.,5.,6.,1.]]],dtype=tf.float64)
data2 = tf.constant(np.zeros((1,5)))
def networkS(input_1, input_2, reuse):
#this is a multi-input network using tf.keras api
with tf.variable_scope("test", reuse=reuse):
#input_1
x = tf.keras.layers.CuDNNGRU(512, return_sequences=True)(input_1)
x = tf.keras.layers.CuDNNGRU(512, return_sequences=True)(x)
x = tf.keras.layers.CuDNNGRU(512)(x)
#input_2
y = tf.keras.layers.Dense(32, activation="relu")(input_2)
#merge two input
x = tf.keras.layers.concatenate([x, y], axis=-1)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
vf_pre = tf.keras.layers.Dense(128, activation='relu')(x)
vf = tf.keras.layers.Dense(1)(vf_pre)
return vf
v1 = networkS(data1, data2, tf.AUTO_REUSE)
v2 = networkS(data1, data2, tf.AUTO_REUSE)
v3 = networkS(data1, data2, tf.AUTO_REUSE)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run([v1,v2,v3]))
我的理解是,在图形构建阶段: 第一次调用networkS(),我们在variavle_scope“ test”下创建一个新的网络,然后对networkS()的以下调用仅重用现有的layer变量。 但是,在输入相同的情况下,我们对v1,v2,v3的结果不同
[array([[0.00112361]]), array([[0.00107469]]), array([[0.00115032]])]
我认为这意味着这三个网络是在paralell中构建的,无法共享相同的变量,因此从相同的输入产生不同的结果。
如果我两次调用sess.run,它在两次调用之间会产生相同的结果
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run([v1,v2,v3]))
print(sess.run([v1,v2,v3]))
[array([[0.00550815]]), array([[-0.00294633]]), array([[0.00584344]])]
[array([[0.00550815]]), array([[-0.00294633]]), array([[0.00584344]])]
我该如何解决这个问题?
答案 0 :(得分:0)
找到了解决方案: According to this keras github post:的目的是,每次您创建keras.layer类时,都会创建一组新的变量。 要重用变量,您需要创建一个图层或模型对象,然后调用该对象以重用变量。
新代码:
class GruModel(tf.keras.Model):
def __init__(self):
super(GruModel, self).__init__()
#create all the keras.layers objects in __init__()
self.GRU_1 = tf.keras.layers.CuDNNGRU(512, return_sequences=True)
self.GRU_2 = tf.keras.layers.CuDNNGRU(512, return_sequences=True)
self.GRU_end = tf.keras.layers.CuDNNGRU(512)
self.Dense_second_input = tf.keras.layers.Dense(32, activation="relu")
self.Dense_merge = tf.keras.layers.Concatenate(axis=-1)
self.Dense_1 = tf.keras.layers.Dense(512, activation='relu')
self.Dense_2 = tf.keras.layers.Dense(256, activation='relu')
self.Dense_3 = tf.keras.layers.Dense(128, activation='relu')
self.Dense_vf_pre = tf.keras.layers.Dense(128, activation='relu')
self.Dense_vf = tf.keras.layers.Dense(1)
def call(self, input_1, input_2):
#input_1
x = self.GRU_1(input_1)
x = self.GRU_2(x)
x = self.GRU_end(x)
#input_2
y = self.Dense_second_input(input_2)
#merge two input
x = self.Dense_merge.apply([x, y])
x = self.Dense_1(x)
x = self.Dense_2(x)
x = self.Dense_3(x)
vf_pre = self.Dense_vf_pre(x)
vf = self.Dense_vf(vf_pre)
return vf
然后:
data1 = tf.constant([[[3.,5.,6.,1.]]],dtype=tf.float64)
data2 = tf.constant(np.zeros((1,5)))
model = GruModel()
v1 = model.call(data1, data2)
v2 = model.call(data1, data2)
v3 = model.call(data1, data2)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run([v1,v2,v3]))
print(sess.run([v1,v2,v3]))
结果:
[array([[0.01640865]]), array([[0.01640865]]), array([[0.01640865]])]
[array([[0.01640865]]), array([[0.01640865]]), array([[0.01640865]])]