我正在尝试调试用于训练CNN的数据预处理器。 (它尝试加载数据和训练)。使用>数据集发出太多打开文件错误400幅图像(少于约400幅作品)。这是代码。这有点像发生内存泄漏,可能有太多文件正在排队?或者我没有删除一些东西(PIL?但它自己关闭文件)。
def buffered_gen_mp(pil_img_gen, buffer_size=2):
"""
Generator that runs a slow source generator in a separate process.
buffer_size: the maximal number of items to pre-generate (length of the buffer)
"""
buffer = mp.Queue(maxsize=buffer_size-1)
def _buffered_generation_process(pil_img_gen_, buffer):
for (img_fname,img),(limg_fname,limg) in pil_img_gen_:
sio = StringIO.StringIO()
img.save(sio, 'PNG')
img_enc = sio.getvalue()
sio.close()
sio = StringIO.StringIO()
limg.save(sio, 'PNG')
limg_enc = sio.getvalue()
sio.close()
buffer.put(((img_fname,img_enc),(limg_fname,limg_enc)), block=True)
buffer.put(None) # sentinel: signal the end of the iterator
buffer.close()
process = mp.Process(target=_buffered_generation_process, args=(pil_img_gen, buffer))
process.start()
for data in iter(buffer.get, None):
(img_fname,img_enc),(limg_fname,limg_enc) = data
img = Image.open(StringIO.StringIO(img_enc))
limg = Image.open(StringIO.StringIO(limg_enc))
yield ((img_fname,img),(limg_fname,limg))
def ImageFnameGen(data_dir, img=True, label=True, depth=False, disp=False):
for inst_dir in sorted(data_dir.dirs()):
out = []
img_fname = inst_dir/'image.jpg'
limg_fname = inst_dir/'labels.png'
if img:
out.append(img_fname)
if label:
out.append(limg_fname)
yield out
def PilImageGen(img_fname_g):
for fnames in img_fname_g:
out = []
for fname in fnames:
out.append((fname,Image.open(str(fname))))
yield out
def ScaledImageGen(cfg, data_dir=None, randomize=True, loop=True):
img_fname_gen = ImageFnameGen(data_dir)
pil_img_gen = PilImageGen(img_fname_gen)
out = []
for (img_fname,img),(limg_fname,limg) in pil_img_gen:
# resize img and limg
out.append(((img_fname,img),(limg_fname,limg)))
while True:
if randomize:
random.shuffle(out)
for item in out:
yield item
if not loop:
break
def GeomJitImageGen(cfg, scaled_img_gen):
#stuff
while True:
for (img_fname, img), (limg_fname, limg) in scaled_img_gen:
# do some stuff
img = np.asarray(img).astype(np.double)
limg = np.asarray(limg).astype(np.double)
wimg = warper_x(img).clip(0,255).astype(np.uint8)
wlimg = warper_y(limg).clip(0,255).astype(np.uint8)
yield (img_fname, Image.fromarray(wimg)), (limg_fname, Image.fromarray(wlimg))
def PhotoJitImageGen(img_gen):
# define weights, algorithm
while True:
for (img_fname, img), (limg_fname, limg) in img_gen:
alg = np.random.choice(algorithms,p=weights)
jimg = alg(img)
yield (img_fname, jimg), (limg_fname, limg)
class Loader(object):
def __init__(self, args, expt):
# define self.cfg
input_gen = buffered_gen_mp(PhotoJitImageGen(GeomJitImageGen(self.cfg, ScaledImageGen(self.cfg))), buffer_size=32*8)
self.input_gen = input_gen
# stuff
def __call__(self, x_shared, y_shared):
assert(len(x_shared)==len(y_shared))
n = len(x_shared)
for ix in xrange(n):
(img_fname, pil_img), (limg_fname, pil_limg) = self.input_gen.next()
img = np.asarray(pil_img)
limg = np.asarray(pil_limg)
# stuff