我一直试图在使用tesseract从图像中提取文本的程序上使用多重处理。但是,当我给图片命名时,它只会在目录中搜索图片名称的第一个字母
def tess(all_clips):
img_text={}
start = timeit.default_timer()
image_dirs=[]
for a in all_clips:
image_dirs.append(Image.open('E:/fin_100_images/'+a))
num=0
for img in image_dirs:
df_temp = pytesseract.image_to_data(img,lang='mar',output_type='data.frame')
df_temp=df_temp.dropna().drop(['level', 'page_num', 'par_num', 'line_num', 'word_num','conf'],axis=1).reset_index().drop('index',1)
img_text[img] = df_temp.T.to_dict()
print('Total images done: '+str(num+1)+'\t Total images remaining: '+str(len(all_clips)-num-1)+' ',end='\r')
num=num+1
stop = timeit.default_timer()
df_temp = pd.DataFrame(list(img_text.items()), columns=['name', 'word_detail'])
print('Time: ', stop - start)
return df_temp
if __name__=='__main__':
all_clips = list(set([f for f in listdir('E:/fin_100_images/') if isfile(join('E:/fin_100_images/', f))]))
processes=[]
for i in all_clips:
process = Process(target=tess, args=(i,))
processes.append(process)
process.start()
for process in processes:
process.join()
print("Multiprocessing complete")
我遇到以下错误
FileNotFoundError: [Errno 2] No such file or directory: 'E:/fin_100_images/l'
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\PRATHAMESH\Desktop\TEST_multi.py", line 28, in tess
image_dirs.append(Image.open('E:/fin_100_images/'+a))
File "C:\ProgramData\Anaconda3\lib\site-packages\PIL\Image.py", line 2770, in open
fp = builtins.open(filename, "rb")