我使用此脚本生成必须采用特定格式的自定义XML文件。它查询数据库并将结果转换为一个大的xml文件。我这样做是为了从库存零件清单到员工记录的多个数据库。
import csv
import StringIO
import time
import MySQLdb
import lxml.etree
import lxml.builder
from datetime import datetime
import string
from lxml import etree
from lxml.builder import E as buildE
from datetime import datetime
from time import sleep
import shutil
import glob
import os
import logging
def logWrite(message):
logging.basicConfig(
filename="C:\\logs\\XMLSyncOut.log",
level=logging.DEBUG,
format='%(asctime)s %(message)s',
datefmt='%m/%d/%Y %I:%M:%S: %p'
)
logging.debug(message)
def buildTag(tag,parent=None,content=None):
element = buildE(tag)
if content is not None:
element.text = unicode(content)
if parent is not None:
parent.append(element)
return element
def fetchXML(cursor):
logWrite("constructing XML from cursor")
fields = [x[0] for x in cursor.description]
doc = buildTag('DATA')
for record in cursor.fetchall():
r = buildTag('ROW',parent=doc)
for (k,v) in zip(fields,record):
buildTag(k,content=v,parent=r)
return doc
def updateDatabase 1():
try:
conn = MySQLdb.connect(host = 'host',user = 'user',passwd = 'passwd',db = 'database')
cursor = conn.cursor()
except:
sys.exit(1)
logWrite("Cannot connect to database - quitting!")
cursor.execute("SELECT * FROM database.table")
logWrite("Dumping fields from database.table into cursor")
xmlFile = open("results.xml","w")
doc = fetchXML(cursor)
xmlFile.write(etree.tostring(doc,pretty_print=True))
logWrite("Writing XML results.xml")
xmlFile.close()
由于某种原因,我从excel电子表格导入的一个新数据库出现了某些类型的编码错误,而其他数据库没有。这是错误
element.text = unicode(content)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x96 in position 21: ordinal not in range(128)
我尝试通过将buildTag函数更改为显式编码为ascii:
def buildTag(tag,parent=None,content=None):
element = buildE(tag)
if content is not None:
content = str(content).encode('ascii','ignore')
element.text = content
if parent is not None:
parent.append(element)
return element
这仍然无效。
关于我能做些什么来阻止这种想法?我无法逃避它们,因为我不能将“\ x92”作为输出显示在记录中。
答案 0 :(得分:0)
我认为你在windows编码中遇到的问题可以在shell中尝试:
In: print '\x92'.decode('cp1251')
Out: '
答案 1 :(得分:0)
我专注于
element.text = unicode(content)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x96 in position 21: ordinal not in range(128)
我假设content
的类型为str
,即它包含字节代码(仅适用于Python 2)。您必须知道已使用哪种编码来生成此字节代码。然后,为了从这个字节代码创建一个unicode对象,你必须明确告诉Python如何解码它,例如:
element.text = content.decode("utf-8")