用张量流加载LSUN数据集

时间:2018-11-05 12:58:03

标签: python tensorflow deep-learning

最近,我尝试找到正确的方法来读取lmdb形式的LSUN数据集。但是,我找不到任何有用的信息。我想知道如何从lmdb读取图像数据,以及这样做的好处是什么。谢谢!

1 个答案:

答案 0 :(得分:0)

最后,我使用以下代码从lmbd文件中提取LUSN图像。

import os
import lmdb
from PIL import Image
import tempfile

def _export_mdb_images(db_path, out_dir=None, flat=True, limit=-1, size=256):
    out_dir = out_dir
    env = lmdb.open(
        db_path, map_size=1099511627776,
        max_readers=1000, readonly=True
    )
    count = 0
    with env.begin(write=False) as txn:
        cursor = txn.cursor()
        for key, val in cursor:
            key = str(key, 'utf-8')
            # decide image out directory
            if not flat:
                image_out_dir = os.path.join(out_dir, '/'.join(key[:6]))
            else:
                image_out_dir = out_dir

            # create the directory if an image out directory doesn't exist
            if not os.path.exists(image_out_dir):
                os.makedirs(image_out_dir)

            with tempfile.NamedTemporaryFile('wb') as temp:
                temp.write(val)
                temp.flush()
                temp.seek(0)
                image_out_path = os.path.join(image_out_dir, key + '.jpg')
                Image.open(temp.name).resize((size, size)).save(image_out_path)
            count += 1
            if count == limit:
                break
            if count % 1000 == 0:
                print('Finished', count, 'images')

print("start")
db_path = "path to lmbd"
out_dir = os.path.join(db_path, "data")
_export_mdb_images(db_path, out_dir)