"语法错误的含义:标记的对象是错误的类型(布尔值)"

时间:2018-03-16 05:28:05

标签: python pdf subprocess

这是我正在使用的代码,我收到上述错误的非格式错误的PDF。我已完整列出了上述错误。请让我知道上面的错误。我也使用statinfo来计算PDF的大小。由于这些错误,以及Syntax Error: Expected the default config, but wasn't able to find it, or it isn't a Dictionary等其他错误阻止了大小的总结。

from enum import Enum
import os
import subprocess
import sys
import os.path as osp

la = lb = 0
output = {}
clean_files = os.listdir("/home/hima/Downloads/data/cpdfs")
number1 = len(clean_files)

for i in range(0, number1):
    files = "/home/hima/Downloads/data/cpdfs/" + str(clean_files[i])
    statinfo_a = os.stat(files)
    la += statinfo_a.st_size

print "clean files avg size in bytes is " + str(la/number1)

def pdfinf(infile):
   cmd = '/usr/bin/pdfinfo'
   if not osp.exists(cmd):
       raise RuntimeError('System command not found: %s' % cmd)
   if not osp.exists(infile):
       raise RuntimeError('Provided input file not found: %s' % infile)

   def _extract(row):
       """Extracts the right hand value from a : delimited row"""
       return row.split(':', 1)[1].strip()

   output = {}

   labels = ['Title', 'Author', 'Creator', 'Producer', 'CreationDate',
          'ModDate', 'Tagged', 'Pages', 'Encrypted', 'Page size',
          'File size', 'Optimized', 'PDF version']

   cmd_output = subprocess.check_output([cmd, infile])
   for line in cmd_output.splitlines():
       for label in labels:
           if label in line:
               output[label] = _extract(line)
   return output

la = lb = 0
for files in clean_files:
    path = "/home/hima/Downloads/data/cpdfs/" + files
    output = pdfinf(path)
    value = output['File size']
    value = value[:-6]
    la += float(value)

print "clean file size" + str(la/number1)

0 个答案:

没有答案