我刚开始使用tensorflow,我认为第一步是将CIFAR10模型用于我自己的使用。我的数据库不是图像,而是信号,整个数据库的形状为[16400,3000,1,1]
(维度:所有样本的数量,高度,宽度和有意添加的通道数)。我已经在使用MatConvNet工具箱解决这个问题了,所以这个问题严格来说就是张量流技术。数据库是一个上面大小的准备好的numpy张量,在下面的代码中我试图准备数据对于训练脚本是可读的
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import numpy as np
IMAGE_SIZE = 3000
data = np.load('/home/tensorflow-master/tensorflow/models/image/cifar10 /konsensop/data.npy')
labels = np.load('/home/tensorflow-master/tensorflow/models/image/cifar10/konsensop/labels.npy')
labels = labels-1
labels = labels.astype(int)
data = tf.cast(data,tf.float32)
labels = tf.cast(labels,tf.int64)
NUM_CLASSES = 2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 10000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 6400
def _generate_image_and_label_batch(data_sample, label, in_queue_examples,
batch_size, shuffle):
num_preprocess_threads = 16
if shuffle:
data, label_batch = tf.train.shuffle_batch(
[data_sample, label],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + batch_size,
min_after_dequeue=min_queue_examples)
else:
data, label_batch = tf.train.batch(
[data_sample, label],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + batch_size)
return data, tf.reshape(label_batch, [batch_size])
def inputs(data,labels, batch_size):
for i in xrange(0, data.shape[0]/batch_size):
data_sample = data[i,:,:,:]
label = labels[i,0]
height = 3000
width = 1
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN*
min_fraction_of_examples_in_queue)
print('Filling queue with %d data before starting to train' % min_queue_examples)
return _generate_image_and_label_batch(data_sample, label,
min_queue_examples, batch_size,
shuffle=True)
我试图加载我所拥有的数据并以cifar10模式的方式生成批次,但在运行培训师代码时,我在data,labels = konsensop_input.inputs(data,labels,batch_size) UnboundcocalError: local variable 'data' referenced before assigment
data = konsensop_input.data
labels = konsensop_input.labels
def train():
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable = False)
data, labels = konsensop_input.inputs(data, labels, batch_size)
logits = konsensop_train.inference(data)
# calculate loss
loss = konsensop.loss(logits, labels)
train_op = konsensop.train(loss, global_step)
# create a saver
saver = tf.train.Saver(tf.all_variables()) #saves all variables in a graph
# build the summary operation based on the TF collection of summaries
summary_op = tf.merge_all_summaries()
# build an initialization operation to run below
init = tf.initialize_all_variables()
# start running operations on the graph
sess = tf.Session(config = tf.ConfigProto(log_device_placement=False))
sess.run(init)
# start the queue runners
tf.train.start_queue_runners(sess = sess) #co to i po co to"""
summary_writer = tf.train.SummaryWriter( FLAGS.train_dir, sess.graph)
for step in xrange(FLAGS.max_step):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)')
print ( format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step = step)
def main(argv=None):
train()
if __name__=='__main__':
tf.app.run()
我想弄清楚如何在这里实施合理的数据馈送技术
答案 0 :(得分:0)
对于您想要使用的相对较小的数据集,您可以考虑将其加载到一个大的numpy数组中,然后以小批量迭代它,然后通过tf.placeholder
s将其提供给计算图。和feed_dict
机制。
小批量迭代可能看起来像这样(你应该在每个时代之后添加随机改组):
def iterate_batches(X, y, batch_size, num_epochs):
N = np.size(X, 0)
batches_per_epoch = N/float(batch_size)
for i in range(num_epochs):
for j in range(batches_per_epoch):
start, stop = j*batch_size, (j+1)*batch_size
yield X[start:stop, :], y[start:stop]
(如果您不熟悉Python的yield
机制,可以使用谷歌搜索Python生成器。网上有很多很好的介绍。)
鉴于您有一种将整个数据集加载到numpy数组X_train, y_train
的机制,您可以像这样编写训练循环
train_op = ...
for X, y in iterate_batches(X_train, y_train, you_batch_size, your_num_epochs):
sess.run([train_op], feed_dict={X_tensor: X, y_tensor: y}
此处,X_tensor
和y_tensor
是数据的tf.placeholder
,您必须在网络架构中指定。