采用TensorArray渐变时出现奇怪错误

时间:2016-04-23 05:41:20

标签: tensorflow

我正在使用下面粘贴的代码。代码的“前向”部分似乎通过“断言root_emb == 1 + emb [0] * emb [1]”传递。但是,一旦采取了训练步骤(断言后面的行),就会出现一个奇怪的错误,表明在循环过程中写入了TensorArray的问题。

  

tensorflow.python.framework.errors.InvalidArgumentError:TensorArray   TensorArray @ gradient:无法读取TensorArray索引2,因为   它尚未写入。 [[节点:   gradients / while / TensorArrayWrite_grad / TensorArrayRead =   TensorArrayRead [_class = [“loc:@TensorArray”],dtype = DT_FLOAT,   _device = “/作业:本地主机/复制:0 /任务:0 / CPU:0”](梯度/同时/ TensorArrayWrite_grad / TensorArrayGrad / TensorArrayGrad,   梯度/同时/ TensorArrayWrite_grad / TensorArrayRead / StackPop,   梯度/同时/ TensorArrayWrite_grad / TensorArrayGrad / gradient_flow)]]   由op u'gradients / while / TensorArrayWrite_grad / TensorArrayRead'引起,   定义于:文件“minimal.py”,第82行,in       model = TreeRNN(8,1,1,degree = 2)文件“minimal.py”,第61行, init       self.grad = tf.gradients(self.loss,self.params)文件“/Library/Python/2.7/site-packages/tensorflow/python/ops/gradients.py”,   第481行,渐变       in_grads = _AsList(grad_fn(op,* out_grads))文件“/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_grad.py”,   第115行,在_TensorArrayWriteGrad中       grad = g.read(index)File“/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py”,   第177行,正在阅读中       dtype = self._dtype,name = name)文件“/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py”,   第781行,在_tensor_array_read中       flow_in = flow_in,dtype = dtype,name = name)文件“/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py”,   第694行,在apply_op中       op_def = op_def)文件“/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py”,   第2154行,在create_op中       original_op = self._default_original_op,op_def = op_def)文件“/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py”,   第1154行,在 init 中       self._traceback = _extract_stack()

     

...最初创建为op u'while / TensorArrayWrite',   定义于:文件“minimal.py”,第82行,in       model = TreeRNN(8,1,1,degree = 2)文件“minimal.py”,第50行, init       loop_vars =(self.time,node_emb,tf.zeros([1])))文件“/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py”,   第1681行,在       back_prop = back_prop,swap_memory = swap_memory,name = name)文件“/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py”,   第1671行,在while_loop中       result = context.BuildLoop(cond,body,loop_vars)File“/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py”,   第1572行,在BuildLoop中       body_result = body(* vars_for_body_with_tensor_arrays)文件“minimal.py”,第43行,在_reurrence中       new_node_emb = node_emb.write(children_and_parent [-1],parent_emb)文件   “/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py”   第200行,写在       name = name)文件“/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py”,   第875行,在_tensor_array_write中       value = value,flow_in = flow_in,name = name)文件“/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py”,   第694行,在apply_op中       op_def = op_def)

import numpy as np
import tensorflow as tf
from tensorflow.python.ops import tensor_array_ops, control_flow_ops


class TreeRNN(object):

    def __init__(self, num_emb, emb_dim, output_dim, degree=2, learning_rate=0.01):
        self.num_emb = num_emb
        self.emb_dim = emb_dim
        self.output_dim = output_dim
        self.degree= degree
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)

        self.embeddings = tf.Variable(self.init_matrix([self.num_emb, self.emb_dim]))
        self.recursive_unit = self.create_recursive_unit()
        self.W_out = tf.Variable(self.init_matrix([self.output_dim, self.emb_dim]))
        self.b_out = tf.Variable(self.init_vector([self.output_dim]))

        self.x = tf.placeholder(tf.int32, shape=[None])  # word indices
        self.tree = tf.placeholder(tf.int32, shape=[None, self.degree + 1])
        self.y = tf.placeholder(tf.float32, shape=[self.output_dim])

        num_words, = tf.unpack(tf.shape(self.x), 1)  # also num leaves
        emb_x = tf.gather(self.embeddings, self.x)
        node_emb = tensor_array_ops.TensorArray(
            dtype=tf.float32, size=num_words - 1, dynamic_size=True,
            clear_after_read=False)
        node_emb = node_emb.unpack(emb_x)

        num_nodes, _ = tf.unpack(tf.shape(self.tree), 2)  # num internal nodes
        tree_traversal = tensor_array_ops.TensorArray(
            dtype=tf.int32, size=num_nodes)
        tree_traversal = tree_traversal.unpack(self.tree)

        def _recurrence(t, node_emb, _):
            node_info = tree_traversal.read(t)
            children_and_parent = tf.unpack(node_info, self.degree + 1)
            child_emb = []
            for i in xrange(self.degree):
                child_emb.append(node_emb.read(children_and_parent[i]))
            parent_emb = self.recursive_unit(child_emb)
            new_node_emb = node_emb.write(children_and_parent[-1], parent_emb)
            return t + 1, new_node_emb, parent_emb

        self.time = tf.constant(0, dtype=tf.int32, name='time')
        _, _, final_emb = control_flow_ops.While(
            cond=lambda t, _1, _2: t < num_nodes,
            body=_recurrence,
            loop_vars=(self.time, node_emb, tf.zeros([1])))

        self.final_state = final_emb

        self.pred_y = self.activation(
            tf.matmul(self.W_out, tf.reshape(self.final_state, [self.emb_dim, 1]))
            + self.b_out)
        self.loss = self.loss_fn(self.y, self.pred_y)

        self.params = tf.trainable_variables()
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.grad = tf.gradients(self.loss, self.params)
        self.updates = opt.apply_gradients(zip(self.grad, self.params))

    def init_matrix(self, shape):
        return tf.random_normal(shape, stddev=0.1)

    def init_vector(self, shape):
        return tf.zeros(shape)

    def create_recursive_unit(self):
        def unit(child_emb):  # very simple
            return 1 + child_emb[0] * child_emb[1]
        return unit

    def activation(self, inp):
        return tf.sigmoid(inp)

    def loss_fn(self, y, pred_y):
        return tf.reduce_sum(tf.square(y - pred_y))


model = TreeRNN(8, 1, 1, degree=2)
sess = tf.Session()
sess.run(tf.initialize_all_variables())

root_emb = sess.run([model.final_state],
                    feed_dict={model.x: np.array([0, 1]), model.tree: np.array([[0, 1, 2]])})
emb, = sess.run([model.embeddings])
assert root_emb == 1 + emb[0] * emb[1]

out = sess.run([model.updates, model.loss],
               feed_dict={model.x: np.array([0, 1]),
                          model.tree: np.array([[0, 1, 2]]),
                          model.y: np.array([0])})

1 个答案:

答案 0 :(得分:0)

在tf.while_loop

中设置parallel_iterations = 1