我只想在Excel中保存数据,我发现这个代码并且它运行良好:
import xlsxwriter
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('zzzzz.xlsx')
worksheet = workbook.add_worksheet()
date = "17/08/2015"
bill = 001
item = "something"
customer = "Luis"
price = 100
# Start from the first cell. Rows and columns are zero indexed.
row = 0
col = 0
# Iterate over the data and write it out row by row.
worksheet.write(row, col, date )
worksheet.write(row, col + 1, bill)
worksheet.write(row, col + 2, item )
worksheet.write(row, col + 3, customer )
worksheet.write(row, col + 4,price)
workbook.close()
我遇到的问题是当我尝试使用空格保存信息时,如果:
customer = "JOSE LUIS FEBRERO LOPEZ"
错误:
>>>
Traceback (most recent call last):
File "C:\Python27\crearexcel.py", line 28, in <module>
workbook.close()
File "C:\Python27\lib\site-packages\xlsxwriter\workbook.py", line 296, in close
self._store_workbook()
File "C:\Python27\lib\site-packages\xlsxwriter\workbook.py", line 520, in _store_workbook
xml_files = packager._create_package()
File "C:\Python27\lib\site-packages\xlsxwriter\packager.py", line 140, in _create_package
self._write_shared_strings_file()
File "C:\Python27\lib\site-packages\xlsxwriter\packager.py", line 280, in _write_shared_strings_file
sst._assemble_xml_file()
File "C:\Python27\lib\site-packages\xlsxwriter\sharedstrings.py", line 53, in _assemble_xml_file
self._write_sst_strings()
File "C:\Python27\lib\site-packages\xlsxwriter\sharedstrings.py", line 83, in _write_sst_strings
self._write_si(string)
File "C:\Python27\lib\site-packages\xlsxwriter\sharedstrings.py", line 110, in _write_si
self._xml_si_element(string, attributes)
File "C:\Python27\lib\site-packages\xlsxwriter\xmlwriter.py", line 122, in _xml_si_element
self.fh.write("""<si><t%s>%s</t></si>""" % (attr, string))
File "C:\Python27\lib\codecs.py", line 694, in write
return self.writer.write(data)
File "C:\Python27\lib\codecs.py", line 357, in write
data, consumed = self.encode(object, self.errors)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xa0 in position 11: ordinal not in range(128)
我尝试转换为字符串str(customer)
,但这并不起作用。
我发现的唯一解决方案是更换空间&#34; &#34;为&#34; _&#34;
知道如何在Excel中用空格保存数据吗?
这是我的所有代码,我从文件夹中获取所有文件,稍后我转换为文本,在提取一些数据后,我尝试创建一个excel文件。
我尝试编码utf-8但不工作
# -*- coding: cp1252 -*-
# -*- coding: UTF-8 -*-
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from cStringIO import StringIO
import os
import xlsxwriter
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('zzzzz.xlsx')
worksheet = workbook.add_worksheet()
files = [f for f in os.listdir('.') if os.path.isfile(f)]
for f in files:
z = 0
e = (len(files) - 1)
def convert_pdf_to_txt(path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = file(path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
pagenos=set()
fstr = ''
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
interpreter.process_page(page)
str = retstr.getvalue()
fstr += str
fp.close()
device.close()
retstr.close()
return fstr
row = 0
col = 0
while z<e:
factura = files[z]
#ejemplo 1
string = convert_pdf_to_txt(factura)
lines = list(filter(bool,string.split('\n')))
custData = {}
for i in range(len(lines)):
if 'EMAIL:' in lines[i]:
custData['Name'] = lines[i+1]
elif 'FACTURA' in lines[i]:
custData['BillNumber'] = lines[i+1]
elif 'Vencimientos:' in lines[i]:
custData['price'] = lines[i+2]
elif 'Banco:' in lines[i]:
custData['paymentType'] = lines[i+1]
#ejemplo 2
txtList = convert_pdf_to_txt(factura).splitlines()
nameIdx, billNumIdx, priceIdx, expirDateIdx, paymentIdx = -1, -1, -1, -1, -1
for idx, line in enumerate(txtList):
if line == "EMAIL: buendialogistica@gmail.com":
nameIdx = idx +2 # in your example it should be +2...
if line == "FACTURA":
billNumIdx = idx + 1
if "Vencimientos:" in line:
priceIdx = idx + 2
expirDateIdx = idx + 1
if "Banco:" in line:
paymentIdx = idx + 1
name = txtList[nameIdx] if nameIdx != -1 else ''
billNum = txtList[billNumIdx] if billNumIdx != -1 else ''
price = txtList[priceIdx] if priceIdx != -1 else ''
expirDate = txtList[expirDateIdx] if expirDateIdx != -1 else ''
payment = txtList[paymentIdx] if paymentIdx != -1 else ''
print expirDate
billNum = billNum.replace("Â Â ", "")
print billNum
custData['Name'] = custData['Name'].replace("Â", "")
print custData['Name']
custData['paymentType'] = custData['paymentType'].replace("Â", "")
print custData['paymentType']
print price
nombre = str(custData['Name'])
formadepago = custData['paymentType']
z+=1
columna2 = str(billNum) + ", " + nombre + ", " + formadepago
worksheet.write(row, col, expirDate)
worksheet.write(row, col + 1, columna2)
worksheet.write(row, col + 2, price)
row+=1
workbook.close()
您好, 我在所有导入后添加了这段代码....现在我能够保存不间断的空间。
import sys
reload(sys)
sys.setdefaultencoding('Cp1252')
答案 0 :(得分:1)
sys.setdefaultencoding('Cp1252')
- 这是一个令人讨厌的修复 - 所有黑客攻击。它掩盖了其他问题,并使您的代码变得脆弱和特定于平台。
"JOSE LUIS FEBRERO LOPEZ"
包含Windows-1252编码的非中断空格(0xA0
)。
xlsxwriter要求您在使用非ASCII字符时传递Unicode对象。事实上,将Unicode对象用于所有字符串是一种很好的做法。
通过将u
附加到字符串的前面来创建Unicode对象:
u"JOSE LUIS FEBRERO LOPEZ"
由于您的Python源代码编码为Windows-1252,您需要将以下内容添加到源文件的顶部:
# coding=cp1252
这将告诉Python如何将源文件中的字符串解码为Unicode对象。
答案 1 :(得分:0)
您可以尝试通过包含
将脚本的编码设置为utf-8# coding=utf-8
作为第一行。然后你可以通过在它前面添加一个小写字母u来将字符串编码设置为utf-8:
customer = u"JOSE LUIS FEBRERO LOPEZ"
然后甚至可以在文本中使用非ascii字母,如Ñ。
或 - 如前所述 - 在文本中使用普通空格而不是不间断空格(0xa0)。