我使用this script下载并将cifar10数据转换为tfrecord文件...它完成没有问题,我有一个合适的二进制文件。
我然后尝试使用此脚本导入我的文件:
"""Provides data for the Cifar10 dataset.
The dataset scripts used to create the dataset can be found at:
tensorflow/models/slim/datasets/download_and_convert_cifar10.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
import dataset_utils
slim = tf.contrib.slim
_FILE_PATTERN = 'cifar10_%s.tfrecord'
SPLITS_TO_SIZES = {'train': 50000, 'test': 10000}
_NUM_CLASSES = 10
_ITEMS_TO_DESCRIPTIONS = {
'image': 'A [32 x 32 x 3] color image.',
'label': 'A single integer between 0 and 9',
}
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
"""Gets a dataset tuple with instructions for reading cifar10.
Args:
split_name: A train/test split name.
dataset_dir: The base directory of the dataset sources.
file_pattern: The file pattern to use when matching the dataset sources.
It is assumed that the pattern contains a '%s' string so that the split
name can be inserted.
reader: The TensorFlow reader type.
Returns:
A `Dataset` namedtuple.
Raises:
ValueError: if `split_name` is not a valid train/test split.
"""
if split_name not in SPLITS_TO_SIZES:
raise ValueError('split name %s was not recognized.' % split_name)
if not file_pattern:
file_pattern = _FILE_PATTERN
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
# Allowing None in the signature so that dataset_factory can use the default.
if not reader:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/format':
tf.FixedLenFeature((), tf.string, default_value='png'),
'image/class/label':
tf.FixedLenFeature(
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
labels_to_names = None
if dataset_utils.has_labels(dataset_dir):
labels_to_names = dataset_utils.read_label_file(dataset_dir)
return slim.dataset.Dataset(
data_sources=file_pattern,
reader=reader,
decoder=decoder,
num_samples=SPLITS_TO_SIZES[split_name],
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
num_classes=_NUM_CLASSES,
labels_to_names=labels_to_names)
data = get_split('test', '/path/to/cifar10_dir')
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True, log_device_placement=False))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
data_provider = slim.dataset_data_provider.DatasetDataProvider(
data, num_readers=10, shuffle=True)
img, lbl = data_provider.get(['image', 'label'])
它也没有给我任何错误,但是当我尝试时:
sess.run(img)
这个过程永远不会停止。 tensorflow正在启动会话但是没有读取文件,它不会释放提示并且基本上什么都不做。我真的很生气,因为我不知道从哪里开始。有人有任何建议吗?
答案 0 :(得分:1)
您应该在queue runners
之后开始DatasetDataProvider
,这也是一个队列。因此,请将其更改为以下顺序:
data_provider = slim.dataset_data_provider.DatasetDataProvider(
data, num_readers=10, shuffle=True)
img, lbl = data_provider.get(['image', 'label'])
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)