Question

我在tensorflow中构建了一个图形：

  with tf.variable_scope('embedding'):
        self.embedding = tf.Variable(self.data.embedding, trainable=True, name="emb",dtype=self.float_type)  #

然后将此单词嵌入发送到神经网络模型。结果似乎没问题。但我想添加另一个嵌入，例如pos-tags，我这样添加：

  with tf.variable_scope('pos_embedding'):
    self.pos_embedding =  tf.Variable("emb", [self.data.pnum, self.option.pos_size])

两个嵌入连接到一个新的嵌入。但我很奇怪，我只设置这样的pos ebedding并且不使用它的模型，换句话说，嵌入只是字嵌入，添加上面的代码然后结果改变了很多。根据我的理解，如果我不将self.pos_embedding发送到任何实际变量，self.pos_embedding的操作将不会执行。但结果发生了很大变化。

以下是我的模型构建功能。即使我只添加以下行，也会更改损失函数值：self.test_variable = tf.constant（0）。为什么这行代码改变了损失函数值？

def __build_model(self):
    tf.reset_default_graph()
    tf.set_random_seed(123)
    self.encoder_inputs = tf.placeholder(shape=[None, self.data.max_x], dtype=self.int_type)
    self.encoder_length = tf.placeholder(shape=[None, ], dtype=self.int_type)
    self.decoder_inputs = []
    self.decoder_targets=[]
    self.target_weights=[]
    self.keep_prob = tf.placeholder(self.float_type)
    for i in range(self.data.max_y):
        self.decoder_targets.append(tf.placeholder(self.int_type,[None],name='decoder_target%d' %i))
        self.target_weights.append(tf.placeholder(self.float_type,[None,],name='target_weight%d' %i))
    with tf.variable_scope('embedding'):
        self.embedding = tf.Variable(self.data.embedding, trainable=True, name="emb",dtype=self.float_type)  
        self.test_variable=tf.constant(0) #
    self.x_emb = tf.nn.embedding_lookup(self.embedding, self.encoder_inputs)
    cell = tf.nn.rnn_cell.LSTMCell(self.option.rnn_size, initializer=tf.random_uniform_initializer(-0.08, 0.08),state_is_tuple=True)
    dropout_cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
    stacked_cell = tf.nn.rnn_cell.MultiRNNCell([dropout_cell] * self.option.num_layers, state_is_tuple=True)
    outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=stacked_cell, cell_bw=stacked_cell, dtype=self.float_type,sequence_length=self.encoder_length, inputs=self.x_emb)
    output_fw, output_bw = outputs
    output = tf.concat(2, [output_fw, output_bw])
    soft_dim = self.option.rnn_size * 2
    self.softmax_w = tf.get_variable("softmax_w", [soft_dim, self.num_classes])
    self.softmax_b = tf.get_variable("softmax_b", [self.num_classes])
    output = tf.reshape(output, [-1, soft_dim])
    self.logits = tf.matmul(output, self.softmax_w) + self.softmax_b

    #for predict
    self.decode_outputs_test = tf.nn.softmax(self.logits)
    self.decode_outputs_test = tf.reshape(self.decode_outputs_test, [-1, self.data.max_x, self.num_classes])

    #for training
    self.classify_out = tf.reshape(self.logits, [-1, self.data.max_x, self.num_classes])
    self.logits2 = tf.transpose(self.classify_out, [1, 0, 2])
    self.logits3 = tf.unpack(self.logits2, axis=0)
    self.loss = tf.nn.seq2seq.sequence_loss(self.logits3, self.decoder_targets, self.target_weights, self.num_classes)
    self.train_op = tf.train.AdamOptimizer(learning_rate=self.option.learning_rate).minimize(self.loss)
    print('model built')

张量流中未使用的运算的影响是什么？

0 个答案: