我使用以下代码创建了images.tfrecoreds文件
from PIL import Image
import numpy as np
import tensorflow as tf
import glob
images = glob.glob('E:\Projects/FYPT/vehicle/bus/*.jpg')
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
tfrecords_filename = 'E:\Projects/FYPT/vehicle/images.tfrecords'
writer = tf.python_io.TFRecordWriter(tfrecords_filename)
original_images = []
for img_path in images:
img = np.array(Image.open(img_path))
height = img.shape[0]
width = img.shape[1]
# Put in the original images into array
# Just for future check for correctness
original_images.append((img))
img_raw = img.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(height),
'width': _int64_feature(width),
'image_raw': _bytes_feature(img_raw)
}))
writer.write(example.SerializeToString())
writer.close()
然后我尝试通过打印“ serialized_example”的输出来检查tf.TFRecordReader()的输出
import tensorflow as tf
import skimage.io as io
reader = tf.TFRecordReader()
tfrecords_filename = 'E:\Projects/FYPT/vehicle/images.tfrecords'
filename_queue = tf.train.string_input_producer([tfrecords_filename],num_epochs=10)
_,serialized_example = reader.read(filename_queue)
sess= tf.Session()
print(sess.run(serialized_example))
但是它给了我以下警告,但没有给出“ serialized_example”的任何输出this是命令行的屏幕截图
我做错了什么,应该如何打印“ serialized_example”的输出
答案 0 :(得分:1)
收到该警告,是因为您正在使用tf.train.string_input_producer()
返回队列,但是基于QueueRunner API
的输入管道已被弃用,并且在以后的版本中不支持。
基于队列的解决方案-不推荐!
serialized_example
只是一个字符串对象(对于每个示例,它是用tf.python_io.TFRecordWriter
到images.tfrecords
文件写入的)。
您需要解析每个示例以获取其功能。就您而言:
features = tf.parse_single_example(serialized_example,
features={"image_raw": tf.FixedLenFeature([], tf.string),
"height": tf.FixedLenFeature([], tf.int64) }
img_raw = tf.image.decode_jpeg(features["image_raw"])
img_height = features["height"]
# initialize global and local variables
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
# start a number of threads
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
while not coord.should_stop():
img_raw_value, img_height_value = sess.run([img_raw, img_height])
print(img_raw_value.shape)
print(img_height_value)
except tf.errors.OutOfRangeError:
print("End of data")
finally:
coord.request_stop()
# wait for all threads to terminate
coord.join(threads)
sess.close()
数据集API-强烈建议!
有关如何建立输入管道的详细说明,请参见here: TensorFlow API。
根据您的情况,您应该像这样定义_parse_function
:
def _parse_function(example_proto):
features={"imgage_raw": tf.FixedLenFeature([], tf.string),
"height": tf.FixedLenFeature([], tf.int64),
"width": tf.FixedLenFeature([], tf.int64)}
parsed_features = tf.parse_single_example(example_proto, features)
img_raw = tf.image.decode_jpeg(parsed_features["img_raw"])
height = parsed_features["height"]
width = parsed_features["width"]
return img_raw, height, width
然后创建一个数据集,该数据集将从TFRecord文件中读取所有示例,并提取功能:
dataset = tf.data.TFRecordDataset([tfrecords_filename])
dataset = dataset.map(_parse_function)
# here you could batch and shuffle
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
while True:
try:
val = sess.run(next_element)
print("img_raw:", val[0].shape)
print("height:", val[1])
print("width:", val[2])
except tf.errors.OutOfRangeError:
print("End of dataset")
break
我希望这会有所帮助。