我正在尝试读取docx文件,而不是将其拆分为重要部分并将它们插入到mysql数据库中。问题是我的docx文件是葡萄牙语,因此它有很多特殊字符(例如á,ã,â),而且我已经努力解决这个问题已有几天了。因为当我添加到数据库时,特殊字符变为'?'。
这是我的代码:
db = MySQLdb.connect(host = "localhost",
user="filipefr",
passwd="da66ro",
db="quiz_db",
charset='utf8',
use_unicode=True)
cursor = db.cursor()
document = docx.Document('teste.docx')
docText = '\n\n'.join([paragraph.text.encode('utf-8') for paragraph in document.paragraphs])
d2 = docText.decode("utf-8")
N = len(d2.encode(sys.stdout.encoding, errors='replace').split("\n"))
query = ''
questao = {}
for i in range(0,N):
s = d2.encode(sys.stdout.encoding, errors='replace').split("\n")[i]
try:
isinstance(int(s[0:2]), int)
questao[num_questoes] = d2.encode(sys.stdout.encoding, errors='replace').split("\n")[i][3:len(s)]
query = 'INSERT INTO multichoice_question (category_id, content) VALUES ("4", "' + d2.encode(sys.stdout.encoding, errors='replace').split("\n")[i][3:len(s)] + '");'
try:
cursor.execute(query)
db.commit()
except MySQLdb.Error, e:
print "error %s" %e
db.rollback()
except:
pass