(注意-TensorFlow的新手)
(代码在做什么-可选) 因此,我一直在尝试创建一个基本上可以做到这一点的模型-您有两个随机初始化的网络A,B和一个网络C(都具有相同的体系结构),它们的参数是A和B的混合。在训练C之后,我必须根据更新后的C的权重更新A和B的权重,然后再次将C的参数初始化为A和B的混合,对其进行训练-循环继续进行。
问题-我正在使用tf更新参数。创建会话后进行操作,导致每次迭代后计算时间增加(由于计算图的扩展)
参考- 1. Tensorflow: run time increases per iteration 和
我尝试了第二个链接的第一种方法,即。为所有tf创建占位符。操作和通过feed_dict传递变量,但是由于我需要更新张量,因此在feed_dict上传递张量W1a,W1b等给出了错误(因为您无法适当地在feed_dict中传递张量)。我也已经阅读了渴望执行的内容,但是由于现有代码,这会引发错误。请注意下面的代码可以工作,并且可以完成我想要的工作,但是我想知道是否有人可以建议任何优化策略。如果有人想看一下,这里是完整代码的链接-https://bitbucket.org/yashpandey4/so_tensorflow/commits/2e1bfee9c076d9af184a95133ca216b3392c6d27
暂时忽略training_step函数,然后看一下循环
#Set up the networks A,B,C, optimisers, loss functions etc
# init
sess=tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# Call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):
# training on batches of 100 images with 100 labels
batch_X, batch_Y = mnist.train.next_batch(100)
# compute training values for visualisation
if update_train_data:
a, c, l = sess.run([accuracy, cross_entropy, lr],
feed_dict={X: batch_X, Y_: batch_Y, step: i})
print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c) + " (lr:" + str(l) + ")")
# compute test values for visualisation
if update_test_data:
a, c = sess.run([accuracy, cross_entropy],
feed_dict={X: mnist.test.images, Y_: mnist.test.labels})
print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))
# the backpropagation training step
sess.run(train_step, {X: batch_X, Y_: batch_Y, step: i})
epoch=1
for i in range((epoch*600)+1):
training_step(i, i % 100 == 0, i % 20 == 0)
# Update weights of A,B based on new weights of C
ass1=(W1a.assign(tf.add(tf.multiply(W1c, trans_arr1),tf.multiply(W1a,tf.subtract(tf.ones([784, L]),trans_arr1)))))
ass2=(W1b.assign(tf.add(tf.multiply(W1c, trans_arr1),tf.multiply(W1b,tf.subtract(tf.ones([784, L]),trans_arr1)))))
ass3=(W2a.assign(tf.add(tf.multiply(W2c, trans_arr2),tf.multiply(W2a,tf.subtract(tf.ones([L,M]),trans_arr2)))))
ass4=(W2b.assign(tf.add(tf.multiply(W2c, trans_arr2),tf.multiply(W2b,tf.subtract(tf.ones([L,M]),trans_arr2)))))
ass5=(W3a.assign(tf.add(tf.multiply(W3c, trans_arr3),tf.multiply(W3a,tf.subtract(tf.ones([M,N]),trans_arr3)))))
ass6=(W3b.assign(tf.add(tf.multiply(W3c, trans_arr3),tf.multiply(W3b,tf.subtract(tf.ones([M,N]),trans_arr3)))))
ass7=(W4a.assign(tf.add(tf.multiply(W4c, trans_arr4),tf.multiply(W4a,tf.subtract(tf.ones([N,O]),trans_arr4)))))
ass8=(W4b.assign(tf.add(tf.multiply(W4c, trans_arr4),tf.multiply(W4b,tf.subtract(tf.ones([N,O]),trans_arr4)))))
ass9=(W5a.assign(tf.add(tf.multiply(W5c, trans_arr5),tf.multiply(W5a,tf.subtract(tf.ones([O,10]),trans_arr5)))))
ass0=(W5b.assign(tf.add(tf.multiply(W5c, trans_arr5),tf.multiply(W5b,tf.subtract(tf.ones([O,10]),trans_arr5)))))
# Update weights of C based on new weights of A,B
trans_arr1 = genRandMat(784,L,ptrans)
trans_arr2 = genRandMat(L,M,ptrans)
trans_arr3 = genRandMat(M,N,ptrans)
trans_arr4 = genRandMat(N,O,ptrans)
trans_arr5 = genRandMat(O,10,ptrans)
W1c = tf.add(tf.multiply(W1a, trans_arr1),tf.multiply(W1b,tf.subtract(tf.ones([784, L]),trans_arr1)))
W2c = tf.add(tf.multiply(W2a, trans_arr2),tf.multiply(W2b,tf.subtract(tf.ones([L,M]),trans_arr2)))
W3c = tf.add(tf.multiply(W3a, trans_arr3),tf.multiply(W3b,tf.subtract(tf.ones([M,N]),trans_arr3)))
W4c = tf.add(tf.multiply(W4a, trans_arr4),tf.multiply(W4b,tf.subtract(tf.ones([N,O]),trans_arr4)))
W5c = tf.add(tf.multiply(W5a, trans_arr5),tf.multiply(W5b,tf.subtract(tf.ones([O,10]),trans_arr5)))
sess.run([ass1,ass2,ass3,ass4,ass5,ass6,ass7,ass8,ass9,ass0,W1c,W2c,W3c,W4c,W5c])
这是我要在循环内执行操作的顺序:火车C,更新W1a .... W5a,将trans_arr1更新为trans_arr5,将W1c更新为W5c,重复。
这是我已经尝试过的-
ass1=(W1a.assign(tf.add(tf.multiply(W1c, trans_arr1),tf.multiply(W1a,tf.subtract(tf.ones([784, L]),trans_arr1)))))
ass2=(W1b.assign(tf.add(tf.multiply(W1c, trans_arr1),tf.multiply(W1b,tf.subtract(tf.ones([784, L]),trans_arr1)))))
ass3=(W2a.assign(tf.add(tf.multiply(W2c, trans_arr2),tf.multiply(W2a,tf.subtract(tf.ones([L,M]),trans_arr2)))))
ass4=(W2b.assign(tf.add(tf.multiply(W2c, trans_arr2),tf.multiply(W2b,tf.subtract(tf.ones([L,M]),trans_arr2)))))
ass5=(W3a.assign(tf.add(tf.multiply(W3c, trans_arr3),tf.multiply(W3a,tf.subtract(tf.ones([M,N]),trans_arr3)))))
ass6=(W3b.assign(tf.add(tf.multiply(W3c, trans_arr3),tf.multiply(W3b,tf.subtract(tf.ones([M,N]),trans_arr3)))))
ass7=(W4a.assign(tf.add(tf.multiply(W4c, trans_arr4),tf.multiply(W4a,tf.subtract(tf.ones([N,O]),trans_arr4)))))
ass8=(W4b.assign(tf.add(tf.multiply(W4c, trans_arr4),tf.multiply(W4b,tf.subtract(tf.ones([N,O]),trans_arr4)))))
ass9=(W5a.assign(tf.add(tf.multiply(W5c, trans_arr5),tf.multiply(W5a,tf.subtract(tf.ones([O,10]),trans_arr5)))))
ass0=(W5b.assign(tf.add(tf.multiply(W5c, trans_arr5),tf.multiply(W5b,tf.subtract(tf.ones([O,10]),trans_arr5)))))
t1=trans_arr1.assign(genRandMat(784,L,ptrans))
t2=trans_arr2.assign(genRandMat(L,M,ptrans))
t3=trans_arr3.assign(genRandMat(M,N,ptrans))
t4=trans_arr4.assign(genRandMat(N,O,ptrans))
t5=trans_arr5.assign(genRandMat(O,10,ptrans))
a1=W1c.assign(tf.add(tf.multiply(W1a, trans_arr1),tf.multiply(W1b,tf.subtract(tf.ones([784, L]),trans_arr1))))
a2=W2c.assign(tf.add(tf.multiply(W2a, trans_arr2),tf.multiply(W2b,tf.subtract(tf.ones([L,M]),trans_arr2))))
a3=W3c.assign(tf.add(tf.multiply(W3a, trans_arr3),tf.multiply(W3b,tf.subtract(tf.ones([M,N]),trans_arr3))))
a4=W4c.assign(tf.add(tf.multiply(W4a, trans_arr4),tf.multiply(W4b,tf.subtract(tf.ones([N,O]),trans_arr4))))
a5=W5c.assign(tf.add(tf.multiply(W5a, trans_arr5),tf.multiply(W5b,tf.subtract(tf.ones([O,10]),trans_arr5))))
# init
sess=tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# Call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):
# training on batches of 100 images with 100 labels
batch_X, batch_Y = mnist.train.next_batch(100)
# compute training values for visualisation
if update_train_data:
a, c, l = sess.run([accuracy, cross_entropy, lr],
feed_dict={X: batch_X, Y_: batch_Y, step: i})
print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c) + " (lr:" + str(l) + ")")
# compute test values for visualisation
if update_test_data:
a, c = sess.run([accuracy, cross_entropy],
feed_dict={X: mnist.test.images, Y_: mnist.test.labels})
print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))
# the backpropagation training step
sess.run(train_step, {X: batch_X, Y_: batch_Y, step: i})
epoch=1
for i in range((epoch*600)+1):
training_step(i, i % 100 == 0, i % 20 == 0)
sess.run([ass1,ass2,ass3,ass4,ass5,ass6,ass7,ass8,ass9,ass0,t1,t2,t3,t4,t5,a1,a2,a3,a4,a5])
这比以前快了很多,但这给出了错误的结果(由于某种原因,A根本没有经过训练,但是B太奇怪了)。还请注意,这是我要在循环内执行操作的顺序:训练C,更新W1a .... W5a,将trans_arr1更新为trans_arr5,将W1c更新为W5c,重复。