Question

我刚开始使用tensorflow，我认为第一步是将CIFAR10模型用于我自己的使用。我的数据库不是图像，而是信号，整个数据库的形状为[16400,3000,1,1]（维度：所有样本的数量，高度，宽度和有意添加的通道数）。我已经在使用MatConvNet工具箱解决这个问题了，所以这个问题严格来说就是张量流技术。数据库是一个上面大小的准备好的numpy张量，在下面的代码中我试图准备数据对于训练脚本是可读的

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
import numpy as np

IMAGE_SIZE = 3000

data = np.load('/home/tensorflow-master/tensorflow/models/image/cifar10 /konsensop/data.npy')
labels = np.load('/home/tensorflow-master/tensorflow/models/image/cifar10/konsensop/labels.npy')
labels = labels-1
labels = labels.astype(int)
data = tf.cast(data,tf.float32)
labels = tf.cast(labels,tf.int64)

NUM_CLASSES = 2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 10000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 6400
def _generate_image_and_label_batch(data_sample, label, in_queue_examples,
                                batch_size, shuffle):
num_preprocess_threads = 16
 if shuffle:
  data, label_batch = tf.train.shuffle_batch(
    [data_sample, label],
    batch_size=batch_size,
    num_threads=num_preprocess_threads,
    capacity=min_queue_examples + batch_size,
    min_after_dequeue=min_queue_examples)
 else:
  data, label_batch = tf.train.batch(
    [data_sample, label],
    batch_size=batch_size,
    num_threads=num_preprocess_threads,
    capacity=min_queue_examples +  batch_size)

  return data, tf.reshape(label_batch, [batch_size])

def inputs(data,labels, batch_size):
  for i in xrange(0, data.shape[0]/batch_size):
    data_sample = data[i,:,:,:]
    label = labels[i,0]
    height = 3000
    width = 1
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN*
         min_fraction_of_examples_in_queue)
    print('Filling queue with %d data before starting to train' %  min_queue_examples)
    return _generate_image_and_label_batch(data_sample, label,
                                     min_queue_examples, batch_size,
                                     shuffle=True)

我试图加载我所拥有的数据并以cifar10模式的方式生成批次，但在运行培训师代码时，我在data,labels = konsensop_input.inputs(data,labels,batch_size) UnboundcocalError: local variable 'data' referenced before assigment

中收到错误

data = konsensop_input.data
labels = konsensop_input.labels
def train():
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable = False)
    data, labels = konsensop_input.inputs(data, labels, batch_size)
    logits = konsensop_train.inference(data)
# calculate loss
    loss = konsensop.loss(logits, labels)
    train_op = konsensop.train(loss, global_step)
# create a saver
    saver = tf.train.Saver(tf.all_variables()) #saves all variables in a graph
# build the summary operation based on the TF collection of summaries
    summary_op = tf.merge_all_summaries()
# build an initialization operation to run below
    init  = tf.initialize_all_variables()
# start running operations on the graph
    sess = tf.Session(config = tf.ConfigProto(log_device_placement=False))
    sess.run(init)
# start the queue runners
    tf.train.start_queue_runners(sess = sess) #co to i po co to"""
    summary_writer = tf.train.SummaryWriter( FLAGS.train_dir, sess.graph)

  for step in xrange(FLAGS.max_step):
    start_time = time.time()
    _, loss_value = sess.run([train_op, loss])
    duration = time.time() - start_time
    assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

    if step % 10 == 0:
      num_examples_per_step = FLAGS.batch_size
      examples_per_sec = num_examples_per_step / duration
      sec_per_batch = float(duration)

      format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f    sec/batch)')
      print ( format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))

    if step % 100 == 0:
      summary_str = sess.run(summary_op)
      summary_writer.add_summary(summary_str, step)

    if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
      checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
      saver.save(sess, checkpoint_path, global_step = step)

def main(argv=None):
 train()

if __name__=='__main__':
 tf.app.run()

我想弄清楚如何在这里实施合理的数据馈送技术

Answer 1

对于您想要使用的相对较小的数据集，您可以考虑将其加载到一个大的numpy数组中，然后以小批量迭代它，然后通过tf.placeholder s将其提供给计算图。和feed_dict机制。

小批量迭代可能看起来像这样（你应该在每个时代之后添加随机改组）：

def iterate_batches(X, y, batch_size, num_epochs):
    N = np.size(X, 0)
    batches_per_epoch = N/float(batch_size)
    for i in range(num_epochs):
        for j in range(batches_per_epoch):
            start, stop = j*batch_size, (j+1)*batch_size
            yield X[start:stop, :], y[start:stop]

（如果您不熟悉Python的yield机制，可以使用谷歌搜索Python生成器。网上有很多很好的介绍。）

鉴于您有一种将整个数据集加载到numpy数组X_train, y_train的机制，您可以像这样编写训练循环

train_op = ...
for X, y in iterate_batches(X_train, y_train, you_batch_size, your_num_epochs):
    sess.run([train_op], feed_dict={X_tensor: X, y_tensor: y}

此处，X_tensor和y_tensor是数据的tf.placeholder，您必须在网络架构中指定。

为自己的数据调整Tensorflow CIFAR10模型

1 个答案: