Python IOError:打开太多文件,同时加载数据和训练CNN

时间:2016-04-02 11:29:02

标签: python multithreading generator convolution ioerror

我正在尝试调试用于训练CNN的数据预处理器。 (它尝试加载数据和训练)。使用>数据集发出太多打开文件错误400幅图像(少于约400幅作品)。这是代码。这有点像发生内存泄漏,可能有太多文件正在排队?或者我没有删除一些东西(PIL?但它自己关闭文件)。

def buffered_gen_mp(pil_img_gen, buffer_size=2):
    """
    Generator that runs a slow source generator in a separate process.
    buffer_size: the maximal number of items to pre-generate (length of the buffer)
    """

    buffer = mp.Queue(maxsize=buffer_size-1)

    def _buffered_generation_process(pil_img_gen_, buffer):
        for (img_fname,img),(limg_fname,limg) in pil_img_gen_:

            sio = StringIO.StringIO()
            img.save(sio, 'PNG')
            img_enc = sio.getvalue()
            sio.close()
            sio = StringIO.StringIO()
            limg.save(sio, 'PNG')
            limg_enc = sio.getvalue()
            sio.close()

            buffer.put(((img_fname,img_enc),(limg_fname,limg_enc)), block=True)

        buffer.put(None) # sentinel: signal the end of the iterator
        buffer.close() 

    process = mp.Process(target=_buffered_generation_process, args=(pil_img_gen, buffer))
    process.start()

    for data in iter(buffer.get, None):
        (img_fname,img_enc),(limg_fname,limg_enc) = data
        img = Image.open(StringIO.StringIO(img_enc))
        limg = Image.open(StringIO.StringIO(limg_enc))
        yield ((img_fname,img),(limg_fname,limg))


def ImageFnameGen(data_dir, img=True, label=True, depth=False, disp=False):
    for inst_dir in sorted(data_dir.dirs()):
        out = []
        img_fname = inst_dir/'image.jpg'
        limg_fname = inst_dir/'labels.png'
        if img:
            out.append(img_fname)
        if label:
            out.append(limg_fname)
        yield out

def PilImageGen(img_fname_g):
    for fnames in img_fname_g:
        out = []
        for fname in fnames:
            out.append((fname,Image.open(str(fname))))
        yield out


def ScaledImageGen(cfg, data_dir=None, randomize=True, loop=True):
    img_fname_gen = ImageFnameGen(data_dir)
    pil_img_gen = PilImageGen(img_fname_gen)
    out = []
    for (img_fname,img),(limg_fname,limg) in pil_img_gen:
        # resize img  and limg
        out.append(((img_fname,img),(limg_fname,limg)))
    while True:
        if randomize:
            random.shuffle(out)

        for item in out:
            yield item

        if not loop:
            break


def GeomJitImageGen(cfg, scaled_img_gen):
    #stuff
    while True:
        for (img_fname, img), (limg_fname, limg) in scaled_img_gen:
            # do some stuff
            img = np.asarray(img).astype(np.double)
            limg = np.asarray(limg).astype(np.double)

            wimg = warper_x(img).clip(0,255).astype(np.uint8)
            wlimg = warper_y(limg).clip(0,255).astype(np.uint8)
            yield (img_fname, Image.fromarray(wimg)), (limg_fname, Image.fromarray(wlimg))


def PhotoJitImageGen(img_gen):
    # define weights, algorithm
    while True:
        for (img_fname, img), (limg_fname, limg) in img_gen:
            alg = np.random.choice(algorithms,p=weights)
            jimg = alg(img)
            yield (img_fname, jimg), (limg_fname, limg)


class Loader(object):
    def __init__(self, args, expt):
        # define self.cfg
        input_gen = buffered_gen_mp(PhotoJitImageGen(GeomJitImageGen(self.cfg, ScaledImageGen(self.cfg))), buffer_size=32*8)
        self.input_gen = input_gen
        # stuff

    def __call__(self, x_shared, y_shared):
        assert(len(x_shared)==len(y_shared))
        n = len(x_shared)
        for ix in xrange(n):
            (img_fname, pil_img), (limg_fname, pil_limg) = self.input_gen.next()
            img = np.asarray(pil_img)
            limg = np.asarray(pil_limg)
            # stuff

0 个答案:

没有答案