这是我正在使用的代码,我收到上述错误的非格式错误的PDF。我已完整列出了上述错误。请让我知道上面的错误。我也使用statinfo来计算PDF的大小。由于这些错误,以及Syntax Error: Expected the default config, but wasn't able to find it, or it isn't a Dictionary
等其他错误阻止了大小的总结。
from enum import Enum
import os
import subprocess
import sys
import os.path as osp
la = lb = 0
output = {}
clean_files = os.listdir("/home/hima/Downloads/data/cpdfs")
number1 = len(clean_files)
for i in range(0, number1):
files = "/home/hima/Downloads/data/cpdfs/" + str(clean_files[i])
statinfo_a = os.stat(files)
la += statinfo_a.st_size
print "clean files avg size in bytes is " + str(la/number1)
def pdfinf(infile):
cmd = '/usr/bin/pdfinfo'
if not osp.exists(cmd):
raise RuntimeError('System command not found: %s' % cmd)
if not osp.exists(infile):
raise RuntimeError('Provided input file not found: %s' % infile)
def _extract(row):
"""Extracts the right hand value from a : delimited row"""
return row.split(':', 1)[1].strip()
output = {}
labels = ['Title', 'Author', 'Creator', 'Producer', 'CreationDate',
'ModDate', 'Tagged', 'Pages', 'Encrypted', 'Page size',
'File size', 'Optimized', 'PDF version']
cmd_output = subprocess.check_output([cmd, infile])
for line in cmd_output.splitlines():
for label in labels:
if label in line:
output[label] = _extract(line)
return output
la = lb = 0
for files in clean_files:
path = "/home/hima/Downloads/data/cpdfs/" + files
output = pdfinf(path)
value = output['File size']
value = value[:-6]
la += float(value)
print "clean file size" + str(la/number1)