tf.variable_scope中的重用选项如何工作?

时间:2016-08-30 17:56:06

标签: tensorflow

我有一个以下问题,我正在编写一个简单的代码来了解tensorflow是如何工作的,我在tf.variable_scope的帮助下定义了卷积的变量。但是,每当我尝试运行此脚本时,我都会收到一个ValueError,要么设置为reuse = None,要么使用= True。

有人可以解释为什么不在没有定义此选项的情况下运行该函数或者是什么解决方案呢?

我的代码是:

import re
import tensorflow as tf
import numpy as np
data = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/data.npy')
labels = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/labels.npy')
labels = np.zeros((16400,))
labels[10001:16400]=1
labels = labels.astype(int)
data = data.astype(np.float32)
#labels = tf.cast(labels,tf.int64)

MOVING_AVERAGE_DECAY = 0.9999     # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0      # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1  # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1       # Initial learning rate.
NUM_CLASSES=2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN= 1000
batch_size=300

def _variable_on_cpu(name, shape, initializer):
  dtype = tf.float32
  var = tf.get_variable(name, shape, initializer = initializer, dtype = dtype)
  return var

def _add_loss_summaries(loss):
  """Add summaries for losses in CIFAR-10 model.

  Generates moving average for all losses and associated summaries for
  visualizing the performance of the network.

  Args:
    total_loss: Total loss from loss().
  Returns:
    loss_averages_op: op for generating moving averages of losses.
  """
  # Compute the moving average of all individual losses and the total loss.
  loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
  losses = tf.get_collection('losses')
  loss_averages_op = loss_averages.apply(losses + [loss])

  # Attach a scalar summary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [loss]:
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
    tf.scalar_summary(l.op.name +' (raw)', l)
    tf.scalar_summary(l.op.name, loss_averages.average(l))
  return loss_averages_op

def _variable_with_weight_decay(name, shape, stddev, wd):
  dtype = tf.float32 
  var = _variable_on_cpu(
      name,
      shape,
      tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
  if wd is not None:
    weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var

def _activation_summary(x):
    tensor_name = re.sub('_[0-9]*/','', x.op.name) 
    tf.histogram_summary(tensor_name + '/activations', x)
    tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))

def iterate_batches(data, labels, batch_size, num_epochs):
  N = int(labels.shape[0])
  batches_per_epoch = int(N/batch_size)
  for i in range(num_epochs):
    for j in range(batches_per_epoch):
      start, stop = j*batch_size, (j+1)*batch_size
      yield data[start:stop,:,:,:], labels[start:stop]

def train():
  with tf.Graph().as_default():
      global_step = tf.Variable(0)
      x_tensor = tf.placeholder(tf.float32, shape=(batch_size, 3000,1,1))
      y_tensor = tf.placeholder(tf.int64, shape=(batch_size,))
      for x,y in iterate_batches(data,labels, 300,1):
        print('yey!')
        with tf.variable_scope('conv1',reuse=True) as scope:
          kernel = _variable_with_weight_decay('weights',
                                           shape=[100,1,1,64],
                                           stddev=5e-2,
                                           wd=0.0)
          conv = tf.nn.conv2d(x_tensor, kernel, [1,3,1,1], padding = 'SAME')
          biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
          bias = tf.nn.bias_add(conv, biases)
          conv1 = tf.nn.relu(bias, name=scope.name)
          _activation_summary(conv1)
          pool1 = tf.nn.max_pool(conv1, ksize=[1,20,1,1], strides=[1,2,1,1], padding='SAME', name='pool1')
          norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')

        with tf.variable_scope('conv2',reuse=True) as scope:
          kernel = _variable_with_weight_decay('weights', [50,1,64,64], stddev = 5e-2, wd=0.0)
          conv = tf.nn.conv2d(norm1, kernel, [1,3,1,1], padding='SAME')
          biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
          bias = tf.nn.bias_add(conv,biases)
          conv2 = tf.nn.relu(bias, name=scope.name)
          _activation_summary(conv2)
          norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm2')
          pool2 = tf.nn.max_pool(norm2, ksize=[1,10,1,1], strides=[1,2,1,1], padding='SAME', name='pool2')

        with tf.variable_scope('conv3',reuse=True) as scope:
          kernel = _variable_with_weight_decay('weights', [30,1,64,64], stddev = 5e-2, wd=0.0)
          conv = tf.nn.conv2d(pool2, kernel, [1,10,1,1], padding='SAME')
          biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
          bias = tf.nn.bias_add(conv,biases)
          conv3 = tf.nn.relu(bias, name=scope.name)
          _activation_summary(conv3)
          norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm3')
          pool3 = tf.nn.max_pool(norm3, ksize=[1,9,1,1], strides=[1,9,1,1], padding='SAME', name='pool3')

        with tf.variable_scope('fc4',reuse=True) as scope:
    # Move everything into depth so we can perform a single matrix multiply.
          reshape = tf.reshape(pool3, [batch_size, -1])
          dim = reshape.get_shape()[1].value
          weights = _variable_with_weight_decay('weights', shape=[dim, 64], stddev=0.04, wd=0.004)
          biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
          fc4 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
          _activation_summary(fc4)

        with tf.variable_scope('fc5',reuse=True) as scope:
          weights = _variable_with_weight_decay('weights', shape=[64, 64],
                                          stddev=0.04, wd=0.004)
          biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
          fc5 = tf.nn.relu(tf.matmul(fc4, weights) + biases, name=scope.name)
          _activation_summary(fc5)

        with tf.variable_scope('softmax_linear',) as scope:
          weights = _variable_with_weight_decay('weights', [64, NUM_CLASSES],
                                          stddev=1/64.0, wd=0.0)
          biases = _variable_on_cpu('biases', [NUM_CLASSES],
                              tf.constant_initializer(0.0))
          softmax_linear = tf.add(tf.matmul(fc5, weights), biases, name=scope.name)
          _activation_summary(softmax_linear)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(softmax_linear, y_tensor, name='cross_entropy_per_example')  
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
        kupa = tf.add_to_collection('losses', cross_entropy_mean) 
        loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

        #neu
        num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /batch_size
        decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

        lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True)
        loss_averages_op = _add_loss_summaries(loss) 
        summary_op = tf.merge_all_summaries()

    #neu
        init = tf.initialize_all_variables()
        sess = tf.Session(config = tf.ConfigProto(log_device_placement=False))
        sess.run(init)
        sess.run([conv, bias, conv1, pool1, norm1, conv2,norm2, pool2, conv3, norm3, pool3,fc4,fc5], feed_dict={x_tensor:x, y_tensor:y})
        sess.run([softmax_linear,loss], feed_dict={x_tensor:x, y_tensor:y})
        sess.run([lr, loss_averages_op, summary_op], feed_dict={x_tensor:x, y_tensor:y})

1 个答案:

答案 0 :(得分:2)

The problem is with this line here:

  for x,y in iterate_batches(data,labels, 300,1):

This will recreate the graph however many times which is a bad thing to do as it'll take up more memory each time (this isn't always the case but it can happen).

The reuse=True comes in something like this example below when defining the graph.

# First call creates one set of variables.
result1 = my_image_filter(image1)
# Another set of variables is created in the second call.
result2 = my_image_filter(image2)

Tensorflow doesn't know whether or not you want to "reuse" the variables as in should they share the same parameters or not.

In your specific case by looping your recreating the parameters each time and telling tensorflow to simply reuse the variables.

It would be better if you could move the for loop to after the graph creation has already occurred and then you could get rid of the reuse=True everywhere.