我对TensorFlow初始化有一个奇怪的问题。我有一个计算图表可以做一些结果,如果我添加一个实际上没有连接到我正在测试的输入/输出流的常量,即使random_seed
是相同的,我还有其他结果。任何提示?以下是代码以及如何重现该问题:
import collections
import logging
import sys
import tensorflow as tf
BATCH_SIZE = None
SENTENCE_LENGTH = None
FORMULA_LENGTH = None
VOCABULARY_SIZE = None
SWITCH_SIZE = None
EMISSION_SIZE = None
POINTING_SIZE = None
EMBEDDING_SIZE = None
HIDDEN_SIZE = None
class Model(object):
"""Base model."""
def __init__(self, trainable=True):
"""Initialize a model instance."""
self._trainable = trainable
input_shape = [BATCH_SIZE, None]
output_shape = [BATCH_SIZE, None]
self._words = tf.placeholder(dtype=tf.int32, shape=input_shape)
self._switch = tf.placeholder(dtype=tf.int32, shape=output_shape)
self._emission = tf.placeholder(dtype=tf.int32, shape=output_shape)
self._mask = tf.placeholder(dtype=tf.float32, shape=output_shape)
self._sentence_length = tf.placeholder(
dtype=tf.int32, shape=[BATCH_SIZE])
self._formula_length = tf.placeholder(
dtype=tf.int32, shape=[BATCH_SIZE])
with tf.device('CPU:0'):
self._gs = tf.get_variable(
'global_step', dtype=tf.int32, initializer=0)
with tf.variable_scope('Embedding') as scope:
with tf.device('CPU:0'):
embeddings_shape = [VOCABULARY_SIZE, EMBEDDING_SIZE]
embeddings = tf.get_variable('E', embeddings_shape)
inputs = tf.nn.embedding_lookup(embeddings, self._words)
with tf.variable_scope('Encoder') as scope:
enc_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE)
enc_state = enc_cell.zero_state(BATCH_SIZE, tf.float32)
enc_output, _ = tf.nn.dynamic_rnn(
enc_cell, inputs,
sequence_length=self._sentence_length,
initial_state=enc_state, scope=scope)
slices = []
for index, length in enumerate(tf.unpack(self._sentence_length)):
slice_ = tf.slice(enc_output,
begin=[index, length - 1, 0],
size=[1, 1, HIDDEN_SIZE])
slices.append(slice_)
last_enc_out = tf.concat(0, slices)
with tf.variable_scope('Decoder') as scope:
self._ll = tf.shape(self._switch)[1]
dec_inputs = tf.tile(last_enc_out, [1, self._ll, 1]) # <- QUI!
dec_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE)
dec_state = dec_cell.zero_state(BATCH_SIZE, tf.float32)
dec_output, _ = tf.nn.dynamic_rnn(
dec_cell, dec_inputs,
sequence_length=self._formula_length,
initial_state=dec_state, scope=scope)
with tf.variable_scope('Switch') as scope:
switch_input = tf.reshape(dec_output, [-1, HIDDEN_SIZE])
switch_w = tf.get_variable('W', [HIDDEN_SIZE, SWITCH_SIZE])
switch_b = tf.get_variable('b', [SWITCH_SIZE])
switch_logits = tf.matmul(switch_input, switch_w) + switch_b
switch_logits = tf.reshape(
switch_logits, [BATCH_SIZE, -1, SWITCH_SIZE])
self._switch_probs = tf.nn.softmax(switch_logits, dim=-1)
self._switch_predict = tf.argmax(self._switch_probs, axis=2)
switch_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
switch_logits, self._switch)
# to have a different initialization in Emission/W
# and Emission/b, UNCOMMENT THE FOLLOWING LINE
# switch_actual_one = tf.equal(self._switch, 1)
switch_losses = self._mask * switch_losses
self._switch_loss = tf.reduce_sum(switch_losses)
with tf.variable_scope('Emission') as scope:
emission_input = tf.reshape(dec_output, [-1, HIDDEN_SIZE])
emission_w = tf.get_variable('W', [HIDDEN_SIZE, EMISSION_SIZE])
emission_b = tf.get_variable('b', [EMISSION_SIZE])
emission_logits = tf.matmul(emission_input, emission_w) + emission_b
emission_logits = tf.reshape(emission_logits, [BATCH_SIZE, -1, EMISSION_SIZE])
self._emission_probs = tf.nn.softmax(emission_logits, dim=-1)
self._emission_predict = tf.argmax(self._emission_probs, axis=2)
emission_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
emission_logits, self._emission)
# mask and sum the losses
emission_losses = self._mask * emission_losses
self._emission_loss = tf.reduce_sum(emission_losses)
self._loss = self._switch_loss + self._emission_loss
if trainable:
with tf.variable_scope('Backprop') as scope:
optimizer = tf.train.AdadeltaOptimizer(
learning_rate=0.005, rho=0.95, epsilon=1e-6)
self._train_op = optimizer.minimize(
self._loss, self._gs, colocate_gradients_with_ops=True)
self._variables = tf.global_variables()
for var in self._variables:
logging.debug(var.name + ' -- ' + var.device)
if __name__ == '__main__':
BATCH_SIZE = 1
SENTENCE_LENGTH = 8
FORMULA_LENGTH = 5
VOCABULARY_SIZE = 10
SWITCH_SIZE = 2
EMISSION_SIZE = 5
POINTING_SIZE = 8
EMBEDDING_SIZE = 5
HIDDEN_SIZE = 10
tf.reset_default_graph()
tf.set_random_seed(23)
INSTANCE = Model(len(sys.argv) > 1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for v in INSTANCE._variables:
print v.name + ': ' + str(v.get_shape())
data = sess.run(v)
print data
将代码放在model.py
模块中运行:
:~$ python model.py > ref.txt
现在取消注释代码中上面指定的行并运行:
:~$ python model.py > mod.txt
:~$ diff ref.txt mod.txt
你应该看到:
272,281c272,281
< [[ 0.15272647 -0.43774983 -0.43797731 -0.24265891 0.32974124]
< [ 0.00733912 -0.07849193 -0.43016246 -0.48970658 0.19110513]
< [ 0.02099371 -0.38966972 -0.49712265 0.46021473 -0.04306412]
< [ 0.40600681 -0.53819847 0.23261058 0.19120842 -0.16637936]
< [-0.30048463 0.23391896 0.28050232 0.46709561 -0.50859016]
< [-0.01186764 -0.4364894 -0.45374861 -0.18917233 -0.29747942]
< [ 0.37092978 0.28236824 -0.32479095 0.19327551 0.13849157]
< [ 0.16678107 -0.4084509 0.05273259 0.0044086 0.18909204]
< [ 0.45275509 0.11965656 -0.21034014 0.26717472 -0.1948047 ]
< [-0.21880585 -0.24638626 0.49764216 0.21117538 -0.14970052]]
---
> [[ 0.02334166 -0.53902954 0.49798179 -0.09884384 -0.21991399]
> [ 0.16418862 -0.43838659 0.02976686 0.32583421 -0.43007952]
> [ 0.03031725 -0.10049745 -0.14239612 -0.40359342 -0.23511672]
> [ 0.10826033 0.1075694 0.0359624 0.38421327 0.22342587]
> [-0.19174138 0.27111518 -0.31547174 0.1219362 0.43017918]
> [ 0.37019521 0.40931159 -0.23115548 -0.46872306 -0.23347196]
> [-0.36434412 -0.33802703 0.33611691 0.42583901 0.17518502]
> [-0.01159394 -0.31617939 -0.29969472 -0.01011997 0.53662634]
> [ 0.53476465 -0.02670521 0.46675134 0.10126555 0.20955974]
> [-0.29040447 0.16516399 0.52025235 -0.5020656 -0.40593004]]
283c283
< [-0.16011143 0.4890095 1.21327794 -0.48182666 0.34403455]
---
> [-0.23730683 -0.43528473 -1.56616378 0.87664163 -0.72844386]