由于计算图的扩展-Tensorflow中每次迭代的运行时间增加-优化策略

时间:2019-05-29 12:50:17

标签: tensorflow

(注意-TensorFlow的新手)

(代码在做什么-可选) 因此,我一直在尝试创建一个基本上可以做到这一点的模型-您有两个随机初始化的网络A,B和一个网络C(都具有相同的体系结构),它们的参数是A和B的混合。在训练C之后,我必须根据更新后的C的权重更新A和B的权重,然后再次将C的参数初始化为A和B的混合,对其进行训练-循环继续进行。

问题-我正在使用tf更新参数。创建会话后进行操作,导致每次迭代后计算时间增加(由于计算图的扩展)

参考- 1. Tensorflow: run time increases per iteration

  1. https://groups.google.com/a/tensorflow.org/forum/#!topic/discuss/jIoA5NuhBNI

我尝试了第二个链接的第一种方法,即。为所有tf创建占位符。操作和通过feed_dict传递变量,但是由于我需要更新张量,因此在feed_dict上传递张量W1a,W1b等给出了错误(因为您无法适当地在feed_dict中传递张量)。我也已经阅读了渴望执行的内容,但是由于现有代码,这会引发错误。请注意下面的代码可以工作,并且可以完成我想要的工作,但是我想知道是否有人可以建议任何优化策略。如果有人想看一下,这里是完整代码的链接-https://bitbucket.org/yashpandey4/so_tensorflow/commits/2e1bfee9c076d9af184a95133ca216b3392c6d27

暂时忽略training_step函数,然后看一下循环

#Set up the networks A,B,C, optimisers, loss functions etc

# init
sess=tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

# Call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):

    # training on batches of 100 images with 100 labels
    batch_X, batch_Y = mnist.train.next_batch(100)

    # compute training values for visualisation
    if update_train_data:
        a, c, l = sess.run([accuracy, cross_entropy, lr],
                                feed_dict={X: batch_X, Y_: batch_Y, step: i})
        print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c) + " (lr:" + str(l) + ")")

    # compute test values for visualisation
    if update_test_data:
        a, c = sess.run([accuracy, cross_entropy],
                            feed_dict={X: mnist.test.images, Y_: mnist.test.labels})
        print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))

    # the backpropagation training step
    sess.run(train_step, {X: batch_X, Y_: batch_Y, step: i})



epoch=1
for i in range((epoch*600)+1):
    training_step(i, i % 100 == 0, i % 20 == 0)

    # Update weights of A,B based on new weights of C
    ass1=(W1a.assign(tf.add(tf.multiply(W1c, trans_arr1),tf.multiply(W1a,tf.subtract(tf.ones([784, L]),trans_arr1)))))
    ass2=(W1b.assign(tf.add(tf.multiply(W1c, trans_arr1),tf.multiply(W1b,tf.subtract(tf.ones([784, L]),trans_arr1)))))
    ass3=(W2a.assign(tf.add(tf.multiply(W2c, trans_arr2),tf.multiply(W2a,tf.subtract(tf.ones([L,M]),trans_arr2)))))
    ass4=(W2b.assign(tf.add(tf.multiply(W2c, trans_arr2),tf.multiply(W2b,tf.subtract(tf.ones([L,M]),trans_arr2)))))
    ass5=(W3a.assign(tf.add(tf.multiply(W3c, trans_arr3),tf.multiply(W3a,tf.subtract(tf.ones([M,N]),trans_arr3)))))
    ass6=(W3b.assign(tf.add(tf.multiply(W3c, trans_arr3),tf.multiply(W3b,tf.subtract(tf.ones([M,N]),trans_arr3)))))
    ass7=(W4a.assign(tf.add(tf.multiply(W4c, trans_arr4),tf.multiply(W4a,tf.subtract(tf.ones([N,O]),trans_arr4)))))
    ass8=(W4b.assign(tf.add(tf.multiply(W4c, trans_arr4),tf.multiply(W4b,tf.subtract(tf.ones([N,O]),trans_arr4)))))
    ass9=(W5a.assign(tf.add(tf.multiply(W5c, trans_arr5),tf.multiply(W5a,tf.subtract(tf.ones([O,10]),trans_arr5)))))
    ass0=(W5b.assign(tf.add(tf.multiply(W5c, trans_arr5),tf.multiply(W5b,tf.subtract(tf.ones([O,10]),trans_arr5)))))

    # Update weights of C based on new weights of A,B
    trans_arr1 = genRandMat(784,L,ptrans)
    trans_arr2 = genRandMat(L,M,ptrans)
    trans_arr3 = genRandMat(M,N,ptrans)
    trans_arr4 = genRandMat(N,O,ptrans)
    trans_arr5 = genRandMat(O,10,ptrans)
    W1c = tf.add(tf.multiply(W1a, trans_arr1),tf.multiply(W1b,tf.subtract(tf.ones([784, L]),trans_arr1)))
    W2c = tf.add(tf.multiply(W2a, trans_arr2),tf.multiply(W2b,tf.subtract(tf.ones([L,M]),trans_arr2)))
    W3c = tf.add(tf.multiply(W3a, trans_arr3),tf.multiply(W3b,tf.subtract(tf.ones([M,N]),trans_arr3)))
    W4c = tf.add(tf.multiply(W4a, trans_arr4),tf.multiply(W4b,tf.subtract(tf.ones([N,O]),trans_arr4)))
    W5c = tf.add(tf.multiply(W5a, trans_arr5),tf.multiply(W5b,tf.subtract(tf.ones([O,10]),trans_arr5)))

    sess.run([ass1,ass2,ass3,ass4,ass5,ass6,ass7,ass8,ass9,ass0,W1c,W2c,W3c,W4c,W5c])

这是我要在循环内执行操作的顺序:火车C,更新W1a .... W5a,将trans_arr1更新为trans_arr5,将W1c更新为W5c,重复。

这是我已经尝试过的-

ass1=(W1a.assign(tf.add(tf.multiply(W1c, trans_arr1),tf.multiply(W1a,tf.subtract(tf.ones([784, L]),trans_arr1)))))
ass2=(W1b.assign(tf.add(tf.multiply(W1c, trans_arr1),tf.multiply(W1b,tf.subtract(tf.ones([784, L]),trans_arr1)))))
ass3=(W2a.assign(tf.add(tf.multiply(W2c, trans_arr2),tf.multiply(W2a,tf.subtract(tf.ones([L,M]),trans_arr2)))))
ass4=(W2b.assign(tf.add(tf.multiply(W2c, trans_arr2),tf.multiply(W2b,tf.subtract(tf.ones([L,M]),trans_arr2)))))
ass5=(W3a.assign(tf.add(tf.multiply(W3c, trans_arr3),tf.multiply(W3a,tf.subtract(tf.ones([M,N]),trans_arr3)))))
ass6=(W3b.assign(tf.add(tf.multiply(W3c, trans_arr3),tf.multiply(W3b,tf.subtract(tf.ones([M,N]),trans_arr3)))))
ass7=(W4a.assign(tf.add(tf.multiply(W4c, trans_arr4),tf.multiply(W4a,tf.subtract(tf.ones([N,O]),trans_arr4)))))
ass8=(W4b.assign(tf.add(tf.multiply(W4c, trans_arr4),tf.multiply(W4b,tf.subtract(tf.ones([N,O]),trans_arr4)))))
ass9=(W5a.assign(tf.add(tf.multiply(W5c, trans_arr5),tf.multiply(W5a,tf.subtract(tf.ones([O,10]),trans_arr5)))))
ass0=(W5b.assign(tf.add(tf.multiply(W5c, trans_arr5),tf.multiply(W5b,tf.subtract(tf.ones([O,10]),trans_arr5)))))

t1=trans_arr1.assign(genRandMat(784,L,ptrans))
t2=trans_arr2.assign(genRandMat(L,M,ptrans))
t3=trans_arr3.assign(genRandMat(M,N,ptrans))
t4=trans_arr4.assign(genRandMat(N,O,ptrans))
t5=trans_arr5.assign(genRandMat(O,10,ptrans))

a1=W1c.assign(tf.add(tf.multiply(W1a, trans_arr1),tf.multiply(W1b,tf.subtract(tf.ones([784, L]),trans_arr1))))
a2=W2c.assign(tf.add(tf.multiply(W2a, trans_arr2),tf.multiply(W2b,tf.subtract(tf.ones([L,M]),trans_arr2))))
a3=W3c.assign(tf.add(tf.multiply(W3a, trans_arr3),tf.multiply(W3b,tf.subtract(tf.ones([M,N]),trans_arr3))))
a4=W4c.assign(tf.add(tf.multiply(W4a, trans_arr4),tf.multiply(W4b,tf.subtract(tf.ones([N,O]),trans_arr4))))
a5=W5c.assign(tf.add(tf.multiply(W5a, trans_arr5),tf.multiply(W5b,tf.subtract(tf.ones([O,10]),trans_arr5))))


# init
sess=tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

# Call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):

    # training on batches of 100 images with 100 labels
    batch_X, batch_Y = mnist.train.next_batch(100)

    # compute training values for visualisation
    if update_train_data:
        a, c, l = sess.run([accuracy, cross_entropy, lr],
                                feed_dict={X: batch_X, Y_: batch_Y, step: i})
        print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c) + " (lr:" + str(l) + ")")

    # compute test values for visualisation
    if update_test_data:
        a, c = sess.run([accuracy, cross_entropy],
                            feed_dict={X: mnist.test.images, Y_: mnist.test.labels})
        print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))

    # the backpropagation training step
    sess.run(train_step, {X: batch_X, Y_: batch_Y, step: i})



epoch=1
for i in range((epoch*600)+1):
    training_step(i, i % 100 == 0, i % 20 == 0)

    sess.run([ass1,ass2,ass3,ass4,ass5,ass6,ass7,ass8,ass9,ass0,t1,t2,t3,t4,t5,a1,a2,a3,a4,a5])

这比以前快了很多,但这给出了错误的结果(由于某种原因,A根本没有经过训练,但是B太奇怪了)。还请注意,这是我要在循环内执行操作的顺序:训练C,更新W1a .... W5a,将trans_arr1更新为trans_arr5,将W1c更新为W5c,重复。

0 个答案:

没有答案