import sys
当我放这行时,我在python shell中看不到任何内容,所以我不知道脚本崩溃的时间。
import sys
import os
from glob import glob
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from cStringIO import StringIO
import re
import xlsxwriter
import time
def find_ext(dr, ext):
return glob(path.join(dr,"*.{}".format(ext)))
files = [f for f in os.listdir('.') if os.path.isfile(f)]
files = filter(lambda f: f.endswith(('.pdf','.PDF')), files)
def convert_pdf_to_txt(path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = file(path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
fstr = ''
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
str = retstr.getvalue()
fstr += str
return fstr
fecha_de_hoy =(time.strftime("%d/%m/%Y"))
fecha_de_hoy = re.sub("/", "-", fecha_de_hoy)
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('Expenses.xlsx')
worksheet = workbook.add_worksheet()
# Start from the first cell. Rows and columns are zero indexed.
row = 0
col = 0
# Iterate over the data and write it out row by row.
worksheet.write(row, col, "FECHA")
worksheet.write(row, col + 1, "CLIENTE")
worksheet.write(row, col + 2, "PROVEEDOR" )
worksheet.write(row, col + 3, "REF. CLIENTE" )
worksheet.write(row, col + 4, "REMITENTE")
worksheet.write(row, col + 5, "DESTINATARIO")
worksheet.write(row, col + 6, "DIRECCION DEST.")
worksheet.write(row, col + 7, "CODIGO POSTAL DEST.")
worksheet.write(row, col + 8, "POBLACION DEST.")
worksheet.write(row, col + 9, "PROVINCIA DEST.")
worksheet.write(row, col + 10, "Nº BULTOS")
worksheet.write(row, col + 11, "PESO")
worksheet.write(row, col + 12, "COSTE")
worksheet.write(row, col + 13, "PVP")
worksheet.write(row, col + 14, "E-mail CONFIRMACIÓN")
e = len(files)
lengthlist = e
print e
while w < lengthlist:
print w
print files[w]
factura = files[w]
string = convert_pdf_to_txt(factura)
txtList = convert_pdf_to_txt(factura).splitlines()
destinatarioIdx, direcionNumIdx, codigoNumIdx, poblacionIdx, provinciaIdx, pedidoIdx, bultosIdx = -1, -1, -1, -1, -1, -1, -1
for idx, line in enumerate(txtList):
if line == "Destino MercancÃa":
destinatarioIdx = idx +1
direcionNumIdx = idx +2
codigoNumIdx = idx +3
poblacionIdx = idx +3
provinciaIdx = idx +4
if line == "Nº de Pedido":
pedidoIdx = idx +1
if "Bultos" in line:
bultosIdx = idx + 2
nombre_destinatario = txtList[destinatarioIdx] if destinatarioIdx != -1 else ''
nombre_destinatario = re.sub("É", "É", nombre_destinatario)
direccion_destinatario = txtList[direcionNumIdx] if direcionNumIdx != -1 else ''
codigo_destinatario = txtList[codigoNumIdx] if codigoNumIdx != -1 else ''
codigo_destinatario = re.sub("\D", "", codigo_destinatario)
poblacion_destinatario = txtList[poblacionIdx] if poblacionIdx != -1 else ''
poblacion_destinatario = re.sub("[0-9]", "", poblacion_destinatario)
poblacion_destinatario = re.sub(r"\s+", "", poblacion_destinatario, flags=re.UNICODE)
provincia_destinatario = txtList[provinciaIdx] if provinciaIdx != -1 else ''
pedido_destinatario = txtList[pedidoIdx] if pedidoIdx != -1 else ''
bultos_destinatario = txtList[bultosIdx] if bultosIdx != -1 else ''
bultos_destinatario = re.sub(r"\s+", "", bultos_destinatario, flags=re.UNICODE)
'''for idx, line in enumerate(txtList):
if line == "Destino MercancÃa":
destinatarioIdx = idx +1
direcionNumIdx = idx +2
codigoNumIdx = idx +3
if codigoNumIdx < 1000:
direcion1 = idx +2
direccion2 = idx +3
direcionNumIdx = (direcion1, direccion2)
codigoNumIdx = idx +4
poblacionIdx = idx +4
provinciaIdx = idx +5'''
print "Nombre Destinatario"
print nombre_destinatario
print "Direccion destinatario"
print direccion_destinatario
print "codigo destinatario"
print codigo_destinatario
print "poblacion destinatario"
print poblacion_destinatario
print "Provincia destinatario"
print provincia_destinatario
print "Nº pedido destinatario"
print pedido_destinatario
print "Nº bultos envío"
print bultos_destinatario
# Iterate over the data and write it out row by row.
worksheet.write(row, col, fecha_de_hoy)
worksheet.write(row, col + 1, "SIDAC")
worksheet.write(row, col + 2, "PROVEEDOR" )
worksheet.write(row, col + 3, pedido_destinatario )
worksheet.write(row, col + 4, "SIDAC")
worksheet.write(row, col + 5, nombre_destinatario)
worksheet.write(row, col + 6, direccion_destinatario)
worksheet.write(row, col + 7, codigo_destinatario)
worksheet.write(row, col + 8, poblacion_destinatario)
worksheet.write(row, col + 9, provincia_destinatario)
worksheet.write(row, col + 10, bultos_destinatario)
worksheet.write(row, col + 11, "PESO")
worksheet.write(row, col + 12, "COSTE")
worksheet.write(row, col + 13, "PVP")
worksheet.write(row, col + 14, "trafico@buendialogistica.com")