使用批量Tensorflow加载测试数据

时间:2017-05-10 02:05:37

标签: image machine-learning tensorflow deep-learning pipeline

以下代码是我从文件中读取图像和标签的管道:

import tensorflow as tf
import numpy as np
import tflearn.data_utils
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import sys

#process labels in the input file
def process_label(label):
    info=np.zeros(6)
    ...
    return info

def read_label_file(file):
    f = open(file, "r")
    filepaths = []
    labels = []
    lines = []

    for line in f:
        tokens = line.split(",")
        filepaths.append([tokens[0],tokens[1],tokens[2]])
        labels.append(process_label(tokens[3:]))
        lines.append(line)
    return filepaths, np.vstack(labels), lines

def get_data_batches(params):
    # reading labels and file path
    train_filepaths, train_labels, train_line = read_label_file(params.train_info)
    test_filepaths, test_labels, test_line = read_label_file(params.test_info)

    # convert string into tensors
    train_images = ops.convert_to_tensor(train_filepaths)
    train_labels = ops.convert_to_tensor(train_labels)
    train_line = ops.convert_to_tensor(train_line)
    test_images = ops.convert_to_tensor(test_filepaths)
    test_labels = ops.convert_to_tensor(test_labels)
    test_line = ops.convert_to_tensor(test_line)

    # create input queues
    train_input_queue = tf.train.slice_input_producer([train_images, train_labels, train_line], shuffle=params.shuffle)
    test_input_queue = tf.train.slice_input_producer([test_images, test_labels, test_line],shuffle=False)

    # process path and string tensor into an image and a label
    train_image=None
    for i in range(train_input_queue[0].get_shape()[0]):
      file_content = tf.read_file(params.path_prefix+train_input_queue[0][i])
      train_imageT = (tf.to_float(tf.image.decode_jpeg(file_content, channels=params.num_channels)))*(1.0/255)
      train_imageT = tf.image.resize_images(train_imageT,[params.load_size[0],params.load_size[1]])
      train_imageT = tf.random_crop(train_imageT,size=[params.crop_size[0],params.crop_size[1],params.num_channels])
      train_imageT = tf.image.random_flip_up_down(train_imageT)
      train_imageT = tf.image.per_image_standardization(train_imageT)
      if(i==0):
          train_image = train_imageT
      else:
          train_image = tf.concat([train_image, train_imageT], 2)

    train_label = train_input_queue[1]
    train_lineInfo  = train_input_queue[2]
    test_image=None
    for i in range(test_input_queue[0].get_shape()[0]):
      file_content = tf.read_file(params.path_prefix+test_input_queue[0][i])
      test_imageT = tf.to_float(tf.image.decode_jpeg(file_content, channels=params.num_channels))*(1.0/255)
      test_imageT = tf.image.resize_images(test_imageT,[params.load_size[0],params.load_size[1]])
      test_imageT = tf.image.central_crop(test_imageT, (params.crop_size[0]+0.0)/params.load_size[0])
      test_imageT = tf.image.per_image_standardization(test_imageT)
      if(i==0):
          test_image = test_imageT
      else:
          test_image = tf.concat([test_image, test_imageT],2)

    test_label = test_input_queue[1]
    test_lineInfo  = test_input_queue[2]
    # define tensor shape
    train_image.set_shape([params.crop_size[0], params.crop_size[1], params.num_channels*3])
    train_label.set_shape([66])
    test_image.set_shape( [params.crop_size[0], params.crop_size[1], params.num_channels*3])
    test_label.set_shape([66])

    # collect batches of images before processing
    train_image_batch, train_label_batch, train_lineno = tf.train.batch([train_image, train_label, train_lineInfo],batch_size=params.batch_size,num_threads=params.num_threads,allow_smaller_final_batch=True)
    test_image_batch, test_label_batch, test_lineno   = tf.train.batch([test_image, test_label, test_lineInfo],batch_size=params.test_size,num_threads=params.num_threads,allow_smaller_final_batch=True)

    if(params.loadSlice=='all'):
      return train_image_batch, train_label_batch, train_lineno, test_image_batch, test_label_batch, test_lineno
    elif params.loadSlice=='train':
      return train_image_batch, train_label_batch
    elif params.loadSlice=='test':
      return test_image_batch, test_label_batch
    elif params.loadSlice=='train_info':
      return train_image_batch, train_label_batch, train_lineno
    elif params.loadSlice=='test_info':
      return test_image_batch, test_label_batch, test_lineno
    else:
      return train_image_batch, train_label_batch, test_image_batch, test_label_batch

我想使用相同的管道来加载测试数据。我的测试数据的大小很大,我无法一次加载所有这些数据。 我有20453个测试示例,它不是批量大小的整数倍(这里是512)。

如何通过此管道只读一次我的所有测试示例,然后测量它们的性能?

目前,我正在使用此代码批量处理我的测试数据,但它不起作用。即使我将 allow_smaller_final_batch 设置为True

,它也始终从队列中读取完整批次
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.restore(sess,"checkpoints2/snapshot-16")

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    more = True
    num_examples=0
    while(more):
      img_test, lbl_test, lbl_line=sess.run([test_image_batch,test_label_batch,test_lineno])
      print(lbl_test.shape)
      size=lbl_test.shape[0]
      num_examples += size
      if size<args.batch_size:
          more = False
    sess.close()

这是我模特的代码:

from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.normalization import batch_normalization
from tflearn.layers.estimator import regression
from tflearn.activations import relu

def get_alexnet(x,num_output):
    network = conv_2d(x, 64, 11, strides=4)
    network = batch_normalization(network,epsilon=0.001)
    network = relu (network)
    network = max_pool_2d(network, 3, strides=2)
    network = conv_2d(network, 192, 5)
    network = batch_normalization(network,epsilon=0.001)
    network = relu(network)
    network = max_pool_2d(network, 3, strides=2)
    network = conv_2d(network, 384, 3)
    network = batch_normalization(network,epsilon=0.0001)
    network = relu(network)
    network = conv_2d(network, 256, 3)
    network = batch_normalization(network,epsilon=0.001)
    network = relu(network)
    network = conv_2d(network, 256, 3)
    network = batch_normalization(network,epsilon=0.001)
    network = relu(network)
    network = max_pool_2d(network, 3, strides=2)

    network = fully_connected(network, 4096)
    network = batch_normalization(network,epsilon=0.001)
    network = relu(network)
    network = dropout(network, 0.5)

    network = fully_connected(network, 4096)
    network = batch_normalization(network,epsilon=0.001)
    network = relu(network)
    network = dropout(network, 0.5)
    network1 = fully_connected(network, num_output)
    network2 = fully_connected(network, 12)
    network3 = fully_connected(network,6)
    return network1,network2,network3

2 个答案:

答案 0 :(得分:0)

这可以通过设置num_epochs = 1和allow_smaller_final_batch = True来实现!

答案 1 :(得分:0)

设置一个解决方案batch_size =测试集的大小