我在CPU上使用Python 2.7.13和Tensorflow 1.3.0。
我想使用DensNet(https://github.com/pudae/tensorflow-densenet)来解决回归问题。我的数据包含60000个jpeg图像,每个图像有37个浮点标签。 我通过以下方式将数据保存到tfrecords文件中:
def Read_Labels(label_path):
labels_csv = pd.read_csv(label_path)
labels = np.array(labels_csv)
return labels[:,1:]
`
def load_image(addr):
# read an image and resize to (224, 224)
img = cv2.imread(addr)
img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)
return img
def Shuffle_images_with_labels(shuffle_data, photo_filenames, labels):
if shuffle_data:
c = list(zip(photo_filenames, labels))
shuffle(c)
addrs, labels = zip(*c)
return addrs, labels
def image_to_tfexample_mine(image_data, image_format, height, width, label):
return tf.train.Example(features=tf.train.Features(feature={
'image/encoded': bytes_feature(image_data),
'image/format': bytes_feature(image_format),
'image/class/label': _float_feature(label),
'image/height': int64_feature(height),
'image/width': int64_feature(width),
}))
def _convert_dataset(split_name, filenames, labels, dataset_dir):
assert split_name in ['train', 'validation']
num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))
with tf.Graph().as_default():
for shard_id in range(_NUM_SHARDS):
output_filename = _get_dataset_filename(dataset_path, split_name, shard_id)
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_ndx = shard_id * num_per_shard
end_ndx = min((shard_id+1) * num_per_shard, len(filenames))
for i in range(start_ndx, end_ndx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i+1, len(filenames), shard_id))
sys.stdout.flush()
img = load_image(filenames[i])
image_data = tf.compat.as_bytes(img.tostring())
label = labels[i]
example = image_to_tfexample_mine(image_data, image_format, height, width, label)
# Serialize to string and write on the file
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()
def run(dataset_dir):
labels = Read_Labels(dataset_dir + '/training_labels.csv')
photo_filenames = _get_filenames_and_classes(dataset_dir + '/images_training')
shuffle_data = True
photo_filenames, labels = Shuffle_images_with_labels(
shuffle_data,photo_filenames, labels)
training_filenames = photo_filenames[_NUM_VALIDATION:]
training_labels = labels[_NUM_VALIDATION:]
validation_filenames = photo_filenames[:_NUM_VALIDATION]
validation_labels = labels[:_NUM_VALIDATION]
_convert_dataset('train',
training_filenames, training_labels, dataset_path)
_convert_dataset('validation',
validation_filenames, validation_labels, dataset_path)
print('\nFinished converting the Flowers dataset!')
我通过以下方式对其进行解码:
with tf.Session() as sess:
feature = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/class/label': tf.FixedLenFeature(
[37,], tf.float32, default_value=tf.zeros([37,], dtype=tf.float32)),
}
filename_queue = tf.train.string_input_producer([data_path], num_epochs=1)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features=feature)
image = tf.decode_raw(features['image/encoded'], tf.float32)
print(image.get_shape())
label = tf.cast(features['image/class/label'], tf.float32)
image = tf.reshape(image, [224, 224, 3])
images, labels = tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for batch_index in range(6):
img, lbl = sess.run([images, labels])
img = img.astype(np.uint8)
print(img.shape)
for j in range(6):
plt.subplot(2, 3, j+1)
plt.imshow(img[j, ...])
plt.show()
coord.request_stop()
coord.join(threads)
到目前为止,这一切都很好。但是当我使用bellow命令解码TFRecord文件时:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
'image/class/label': tf.FixedLenFeature(
[37,], tf.float32, default_value=tf.zeros([37,], dtype=tf.float32)),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image('image/encoded'),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(
keys_to_features, items_to_handlers)
我收到以下错误。
INFO:tensorflow:向协调器报告错误:断言失败:[无法将字节解码为JPEG,PNG,GIF或BMP] [[Node:case / if_0 / decode_image / cond_jpeg / cond_png / cond_gif / Assert_1 / Assert = Assert [T = [DT_STRING],summarize = 3,_device =“/ job:localhost / replica:0 / task:0 / cpu: 0“](case / If_0 / decode_image / cond_jpeg / cond_png / cond_gif / is_bmp,case / If_0 / decode_image / cond_jpeg / cond_png / cond_gif / Assert_1 / Assert / data_0)]] 信息:tensorflow:捕获OutOfRangeError。停止训练。 信息:sensorflow:完成培训!将模型保存到磁盘。
要使用Densenet解决我的问题,我应该先修复此错误。 有人可以帮我解决这个问题。此代码适用于花,MNIST和CIFAR10等数据集,可在https://github.com/pudae/tensorflow-densenet/tree/master/datasets获得,但不适用于我的数据。
答案 0 :(得分:1)
image_data = tf.gfile.FastGFile(filenames[i], 'rb').read()
而不是这个用于加载数据。现在效果很好。
img = load_image(filenames[i])
image_data = tf.compat.as_bytes(img.tostring())
答案 1 :(得分:0)
根据错误,我认为问题在于您使用图像解码器来处理阵列数据(解码数据),因为您在创建TFRecords时保存了解码数据。也许您注意到,当您不使用slim
时,您使用tf.decode_raw
来解码数据。但是当您使用slim
时,'image/format': tf.FixedLenFeature((), tf.string, default_value='raw')
未被使用,默认情况下,slim
将使用图片解码器。
我相信您使用slim/data中的代码,
你需要format_key = 'image/format'
的地方。所以,像这样:
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
'image/class/label': tf.FixedLenFeature(
[1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
}
items_to_handlers = {
'image': tfexample_decoder.Image(
image_key = 'image/encoded',
format_key = 'image/format',
'label': tfexample_decoder.Tensor('image/class/label'),
}
decoder = tfexample_decoder.TFExampleDecoder(
keys_to_features, items_to_handlers)
但我不确定这可以完美地解决您的问题,因为我无法在我的机器中重现您的工作。
答案 2 :(得分:0)