我正在寻找一种从pdf文件中获取文本的方法并找到了一个pdfminer库,在github上有一个工作示例。 我稍微修改它来传递参数,当我使用python执行脚本时它工作正常。但我想有一个exe文件。 我使用pyinstaller来创建exe,不幸的是当我尝试运行文件时出现以下错误:
Traceback (most recent call last):
File "onlypdfminer.py", line 5, in <module>
File "<frozen importlib._bootstrap>", line 971, in _find_and_load
File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 665, in _load_unlocked
File "c:\python36-32\lib\site- packages\PyInstaller\loader\pyimod03_importers.py", line 631, in exec_module
exec(bytecode, module.__dict__)
File "site-packages\pdfminer\pdfpage.py", line 10, in <module>
File "<frozen importlib._bootstrap>", line 971, in _find_and_load
File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 665, in _load_unlocked
File "c:\python36-32\lib\site-packages\PyInstaller\loader\pyimod03_importers.py", line 631, in exec_module
exec(bytecode, module.__dict__)
File "site-packages\pdfminer\pdfdocument.py", line 12, in <module>
File "<frozen importlib._bootstrap>", line 971, in _find_and_load
File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 665, in _load_unlocked
File "c:\python36-32\lib\site-packages\PyInstaller\loader\pyimod03_importers.py", line 631, in exec_module
exec(bytecode, module.__dict__)
...
这是我的代码:
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
import os
import sys
import argparse
parser=argparse.ArgumentParser(description='Convert PDF to txt using PyPDF2 and pdfminer')
parser.add_argument('-i','--input',required=True)
parser.add_argument('-o','--output',required=True)
args = parser.parse_args()
def checkFile(fname):
if not os.path.exists(fname):
sys.exit("Path "+fname+"doesn't exist")
def usePdfMiner(fname, oPath, pages=None):
checkFile(fname)
if not pages:
pagenums = set()
else:
pagenums = set(pages)
try:
output = StringIO()
manager = PDFResourceManager()
converter = TextConverter(manager, output, laparams=LAParams())
interpreter = PDFPageInterpreter(manager, converter)
infile = open(fname, 'rb')
for page in PDFPage.get_pages(infile, pagenums):
interpreter.process_page(page)
infile.close()
converter.close()
text = output.getvalue()
output.close
textFile = open(oPath+"/pdf1.txt", "w")
textFile.write(text)
except:
pass
if __name__ =='__main__':
usePdfMiner(args.input,args.output)
我将不胜感激任何建议