想对pdf文件进行少量处理,以下代码可在我的个人桌面上使用,但不能在办公室使用:(使用Windows)
import subprocess
def pdf_to_text(pdf_file, page=None):
"""
"""
if page is None:
args = ['pdftotext', '-layout', '-q', pdf_file, '-']
else:
args = ['pdftotext', '-f', str(page), '-l', str(page), '-layout',
'-q', pdf_file, '-']
try:
txt = subprocess.check_output(args, universal_newlines=True)
lines = txt.splitlines()
except subprocess.CalledProcessError:
lines = []
return lines
text = pdf_to_text("name_of_my.pdf") # nb : launched from the folder
响应:
Traceback (most recent call last):
File "main_new_old.py", line 119, in <module>
documentText = pdf_to_text(pdf, 0)
File "main_new_old.py", line 23, in pdf_to_text
txt = subprocess.check_output(args, universal_newlines=True)
File "C:\Python27\lib\subprocess.py", line 212, in check_output
process = Popen(stdout=PIPE, *popenargs, **kwargs)
File "C:\Python27\lib\subprocess.py", line 390, in __init__
errread, errwrite)
File "C:\Python27\lib\subprocess.py", line 640, in _execute_child
startupinfo)
WindowsError: [Error 2] Le fichier spÚcifiÚ est introuvable
找不到文件,如何解决?