TFRecords解析:无法解析序列化的示例

时间:2019-11-21 15:22:48

标签: python tensorflow

书写记录功能

def getRecordData(fileName, outFile):
    with tf.io.gfile.GFile(fileName, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    y = []
    with open(outFile) as outFile:
        # ...populate y....
    return {
        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=encoded_jpg_io)),
        'output': tf.train.Feature(float_list=tf.train.FloatList(value=y))
    }

解析tfrecords

 def parseExample(example):
    features = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "output": tf.io.FixedLenFeature([], tf.float32)
    }
    parsed = tf.io.parse_single_example(example, features=features)
    image = tf.image.decode_png(parsed["image"], channels=3)
    return image, parsed["output"]



 def make_dataset(dir, dtype, dataSetType, parse_fn):
  dataset = tf.data.TFRecordDataset(...path...)
  dataset = dataset.shuffle(buffer_size=1000)
  dataset = dataset.map(parseExample)
  dataset = dataset.batch(batch_size=32)
  dataset.cache('E:\\trainingcache')
  return dataset

当我尝试验证图像是否正确加载时

dataset = make_dataset(args.records_dir, 'training', 'tables', parseExample)
    for image_features in dataset:

        image_raw = image_features['image'].numpy()
        display.display(display.Image(data=image_raw))

我得到:

  

example_parsing_ops.cc:240:无效的参数:键:输出。无法解析序列化的示例。

1 个答案:

答案 0 :(得分:1)

我能够使用以下代码重现该错误-

%tensorflow_version 2.x
import tensorflow as tf
print(tf.__version__)

def write_date_tfrecord():  
    #writes 10 dummy values to replicate the issue
    Output = [20191221 + x for x in range(0,10)]
    print("Writing Output - ", Output)

    example = tf.train.Example(
            features = tf.train.Features(
                feature = {                    
                    'Output':tf.train.Feature(float_list=tf.train.FloatList(value=Output))                    
                     }
                ))


    writer = tf.io.TFRecordWriter("Output.tf_record")
    writer.write(example.SerializeToString())

def parse_function(serialized_example):
        features = {
            'Output': tf.io.FixedLenFeature([], tf.float32) 
             }
        features = tf.io.parse_single_example(serialized=serialized_example, features=features)  
        Output = features['Output']
        return Output

def dataset_generator():
    trRecordDataset = tf.data.TFRecordDataset("Output.tf_record")
    trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
    return trRecordDataset    

if __name__ == '__main__':
    write_date_tfrecord()
    generator = dataset_generator()
    for Output in generator:
        print(Output)

输出-

2.2.0
Writing Output -  [20191221, 20191222, 20191223, 20191224, 20191225, 20191226, 20191227, 20191228, 20191229, 20191230]
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/context.py in execution_mode(mode)
   1985       ctx.executor = executor_new
-> 1986       yield
   1987     finally:

10 frames
InvalidArgumentError: Key: Output.  Can't parse serialized Example.
     [[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]] [Op:IteratorGetNext]

During handling of the above exception, another exception occurred:

InvalidArgumentError                      Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/executor.py in wait(self)
     65   def wait(self):
     66     """Waits for ops dispatched in this executor to finish."""
---> 67     pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
     68 
     69   def clear_error(self):

InvalidArgumentError: Key: Output.  Can't parse serialized Example.
     [[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]

解决方案--我可以在修改parse_function中的代码后解决此问题并成功运行代码。您可以在parseExample函数中对其进行修改。修改

'Output': tf.io.FixedLenFeature([], tf.float32)      

'Output': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True) 

固定代码-

%tensorflow_version 2.x
import tensorflow as tf
print(tf.__version__)

def write_date_tfrecord():  
    #writes 10 dummy values to replicate the issue
    Output = [20191221 + x for x in range(0,10)]
    print("Writing Output - ", Output)

    example = tf.train.Example(
            features = tf.train.Features(
                feature = {                    
                    'Output':tf.train.Feature(float_list=tf.train.FloatList(value=Output))                    
                     }
                ))


    writer = tf.io.TFRecordWriter("Output.tf_record")
    writer.write(example.SerializeToString())

def parse_function(serialized_example):
        features = {
            'Output': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True) 
             }
        features = tf.io.parse_single_example(serialized=serialized_example, features=features)  
        Output = features['Output']
        return Output

def dataset_generator():
    trRecordDataset = tf.data.TFRecordDataset("Output.tf_record")
    trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
    return trRecordDataset

if __name__ == '__main__':
    write_date_tfrecord()
    generator = dataset_generator()
    for Output in generator:
        print(Output)

输出-

2.2.0
Writing Output -  [20191221, 20191222, 20191223, 20191224, 20191225, 20191226, 20191227, 20191228, 20191229, 20191230]
tf.Tensor(
[20191220. 20191222. 20191224. 20191224. 20191224. 20191226. 20191228.
 20191228. 20191228. 20191230.], shape=(10,), dtype=float32)

希望这能回答您的问题。学习愉快。