更改GRU,input和initial_state dtype可以使用tensorflow更改反向传播梯度的值

时间:2018-07-26 13:12:08

标签: tensorflow backpropagation rnn

我在tensorflow中有以下代码,我试图在其中使用GRU计算反向传播的梯度。

SQLSTATE[42S02]: Base table or view not found: 1146 Table 
'mycrm.access_permissions' doesn't exist (SQL: select `permissions`.*, 
`access_permissions`.`id_role` as `pivot_id_role`, 
`access_permissions`.`id_permission` as `pivot_id_permission` from 
`permissions` inner join `access_permissions` on `permissions`.`id` = 
`access_permissions`.`id_permission` where `access_permissions`.`id_role` = 
1) (View: 
F:\MYCRM\mycrm\packages\tc\calculator\src\views\permission_pack.blade.php)

因此,只需在代码开头将dtype从import tensorflow as tf import numpy as np cell_size = 32 seq_length = 1000 time_steps1 = 500 time_steps2 = seq_length - time_steps1 x_t = np.arange(1, seq_length + 1) x_t_plus_1 = np.arange(2, seq_length + 2) tf.set_random_seed(123) m_dtype = tf.float32 input_1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="input_1") input_2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="input_2") labels1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="labels_1") labels2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="labels_2") labels = tf.concat([labels1, labels2], axis=1, name="labels") def model(input_feat1, input_feat2): with tf.variable_scope("GRU"): cell1 = tf.nn.rnn_cell.GRUCell(cell_size) cell2 = tf.nn.rnn_cell.GRUCell(cell_size) initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state") with tf.variable_scope("First50"): # output1: shape=[1, time_steps1, 32] output1, new_state1 = tf.nn.dynamic_rnn(cell1, input_feat1, dtype=m_dtype, initial_state=initial_state) with tf.variable_scope("Second50"): # output2: shape=[1, time_steps2, 32] output2, new_state2 = tf.nn.dynamic_rnn(cell2, input_feat2, dtype=m_dtype, initial_state=new_state1) with tf.variable_scope("output"): # output shape: [1, time_steps1 + time_steps2, 32] => [1, 100, 32] output = tf.concat([output1, output2], axis=1) output = tf.reshape(output, shape=[-1, cell_size]) output = tf.layers.dense(output, units=1) output = tf.reshape(output, shape=[1, time_steps1 + time_steps2, 1]) with tf.variable_scope("outputs_1_2_reshaped"): output1 = tf.slice(input_=output, begin=[0, 0, 0], size=[-1, time_steps1, -1]) output2 = tf.slice(input_=output, begin=[0, time_steps1, 0], size=[-1, time_steps2, 1]) print(output.get_shape().as_list(), "1") print(output1.get_shape().as_list(), "2") print(output2.get_shape().as_list(), "3") return output, output1, output2, initial_state, new_state1, new_state2 def loss(output, output1, output2, labels, labels1, labels2): loss = tf.reduce_sum(tf.sqrt(tf.square(output - labels))) loss1 = tf.reduce_sum(tf.sqrt(tf.square(output1 - labels1))) loss2 = tf.reduce_sum(tf.sqrt(tf.square(output2 - labels2))) return loss, loss1, loss2 def optimize(loss, loss1, loss2, initial_state, new_state1, new_state2): with tf.name_scope('Optimizer'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer(learning_rate=0.001) grads1 = tf.gradients(loss2, new_state1) grads2 = tf.gradients(loss1, initial_state) grads3 = tf.gradients(new_state1, initial_state, grad_ys=grads1) grads_wrt_initial_state_1 = tf.add(grads2, grads3) grads_wrt_initial_state_2 = tf.gradients(loss, initial_state, grad_ys=None) return grads_wrt_initial_state_1, grads_wrt_initial_state_2 output, output1, output2, initial_state, new_state1, new_state2 = model(input_1, input_2) loss, loss1, loss2 = loss(output, output1, output2, labels, labels1, labels2) grads_wrt_initial_state_1, grads_wrt_initial_state_2 = optimize(loss, loss1, loss2, initial_state, new_state1, new_state2) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) in1 = np.reshape(x_t[:time_steps1], newshape=(1, time_steps1, 1)) in2 = np.reshape(x_t[time_steps1:], newshape=(1, time_steps2, 1)) l1 = np.reshape(x_t_plus_1[:time_steps1], newshape=(1, time_steps1, 1)) l2 = np.reshape(x_t_plus_1[time_steps1:], newshape=(1, time_steps2, 1)) i_s = np.zeros([1, cell_size]) t1, t2 = sess.run([grads_wrt_initial_state_1, grads_wrt_initial_state_2], feed_dict={input_1: in1, input_2: in2, labels1: l1, labels2: l2, initial_state: i_s}) print(np.mean(t1), np.mean(t2)) print(np.sum(t1), np.sum(t2)) 更改为tf.float32,即可使用张量流更改反向传播梯度的整个值。

所以我想知道为什么以及应该使用tf.float64还是tf.float32

0 个答案:

没有答案