我有这样的字典:
for i in wordlist:
#some searching and parsing that produces one-line sentences, str1 and str2
list1.append(str1)
list2.append(str2)
zipall = zip(list1, list2)
mydict = {i: zipall}
其中' i'是一个字符串。一切都是西里尔文。当我打印它时,我得到代码点(\ u0440 \ u0435等)。
我需要在每次迭代中逐行将字典保存到csv文件中,以便i,str1和str2在同一行和单独的列中,稍后由用户读取。当我尝试
with open('C:\...result.csv','wb') as f: #I will need the 'a' mode?
writer = csv.writer(f)
for key, value in mydict.items():
writer.writerow([key, value])
和类似的方法,我明白了:
UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-4: ordinal not in range(128)
我尝试过的其他内容:
f = open('C:\...results.csv','wb')
w = csv.DictWriter(f,sorted(mydict.keys()))
w.writeheader() #throws error in this line
w.writerow({k:v.encode('utf8') for k,v in mydict.items()})
f.close()
(来自this question)和pickle,基于this question。我一直在尝试iterate over the dictionary,但价值是一个元组,我无法对其进行编码。有些答案涉及functions而不是(尝试使用元组),但我不理解这些方法(并且他们没有工作)。
有(简单)方法吗?
编辑 - ROUNDABOUT解决方案
由于我并不真的需要输出为csv,稍后将在Excel中检查数据,因此我应用了the xlwt package。从here获得了这个想法。
该软件包使我能够使用指定的编码写入Excel电子表格的单元格(请参阅this)。我不再需要字典或元组列表了。我只使用结果字符串。
如果有办法从Python将xls转换为csv,我就不知道了。
答案 0 :(得分:0)
响应在Python 2.7文档中。
请参阅:13.1.5. Examples
您可以定义UnicodeWriter
,见下文:
import cStringIO
import codecs
import csv
class UnicodeWriter(object):
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
# pylint: disable=too-few-public-methods
def __init__(self, ostream, dialect=csv.excel, encoding="utf-8", **kwargs):
"""
Initialize the write with the output stream, the Excel dialect and the encoding.
:param istream: Output stream to encode.
:type istream: file like object.
:param dialect: Excel dialect.
:type dialect: Dialect
:param encoding: Encoding to use.
:type encoding: str
"""
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwargs)
self.stream = ostream
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
"""
Write a row to the output stream (CSV file).
:param row: List of UNICODE string to write.
:type row: list of unicode
"""
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and re-encode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
"""
Write a list of rows. See: :meth:`writerow`.
:param rows: List of rows.
:type rows: list.
"""
for row in rows:
self.writerow(row)
以下是异常处理的完整实现:
import csv
import sys
def to_unicode(obj):
""" Convert an object to UNICODE string (robust way). """
if obj is None:
return u""
elif isinstance(obj, unicode):
return obj
elif isinstance(obj, str):
try:
return unicode(obj, sys.getdefaultencoding())
except UnicodeDecodeError:
return unicode(repr(obj))
else:
return unicode(obj)
class CsvWriteException(ValueError):
"""
Exception raised when a CSV file can't be written.
"""
def __init__(self, csv_path, invalid_row, cause):
"""
Initialize the exception.
:param csv_path: Full path of the CSV file to read.
:type csv_path: str
:param invalid_row: Row to write but containing invalid values.
:type invalid_row: list[unicode]
:param cause: Exception cause of the problem.
:type cause: Exception
"""
super(CsvWriteException, self).__init__(csv_path, invalid_row, cause)
def get_csv_path(self):
"""
:return: Full path of the CSV file to read (unicode).
"""
return self.args[0]
def get_invalid_row(self):
"""
:return: Row to write but containing invalid values (list of unicodes).
"""
return self.args[1]
def get_cause(self):
"""
:return: Exception cause of the problem (Exception).
"""
return self.args[2]
def __str__(self):
return repr(self.__unicode__())
def __unicode__(self):
msg_fmt = (u"Échec d'écriture du fichier {csv_path}, enregistrement invalide\u00a0: {invalid_row}. "
u"-- Cause: {cause}")
csv_path = self.quote(self.get_csv_path())
invalid_row = repr(self.get_invalid_row())
cause = self.get_cause()
err_msg = msg_fmt.format(csv_path=csv_path,
invalid_row=invalid_row,
cause=cause)
return err_msg
@staticmethod
def quote(text):
"""
Quote a text using the format '"{0}"', or the string "None" if the text is None.
:param text: String to quote.
:type text: str or unicode.
:return: The quoted text or "None".
"""
if text is None:
return "None"
else:
if isinstance(text, str):
escaped = unicode(text.replace('"', '\\"'), errors='replace')
else:
escaped = text.replace('"', '\\"')
return u'"{0}"'.format(escaped)
def write_csv_file(csv_path, record_list, dialect=csv.excel, encoding="utf-8"):
"""
Write the records to a CSV file on disk.
See: :meth:`csv.list_dialects`: for a list of all registered dialects.
:param csv_path: Full path of the CSV file to write.
:type csv_path: str or unicode
:param record_list: Records to write: list of dictionaries of the type (field_name, field_value).
:type record_list: list[dict]
:param dialect: The optional 'dialect' parameter can be given which is used to define a set of parameters
specific to a particular CSV dialect. For example: "excel-tab" or "excel".
:type dialect: Dialect or str or unicode
:param encoding: Characters encoding to use to read the CSV file, default: "utf-8".
:type encoding: str or unicode
:raise CsvWriteException: Exception raised when a CSV file can't be written.
"""
with open(csv_path, 'wb') as ostream:
if len(record_list) == 0:
# leave the file empty without header
return
writer = UnicodeWriter(ostream, dialect=dialect, encoding=encoding)
curr_row = None
try:
# Write the header: list of fields.
header = curr_row = record_list[0].keys()
writer.writerow(curr_row)
# Write records: list of values
for record in record_list:
curr_row = [record.get(key) for key in header] # same order as header
curr_row = [to_unicode(value) for value in curr_row]
writer.writerow(curr_row)
except (csv.Error, UnicodeEncodeError) as cause:
raise CsvWriteException(csv_path, curr_row, cause)
答案 1 :(得分:0)
你说你使用西里尔字符。根据定义,它们不在ascii范围内,因此在将它们写入文件之前必须对它们进行编码。假设(根据你的标题)你想使用utf-8编码(其他编码可能是可能的,例如cp1251 ...),只需调整你的第一次尝试进行显式编码:
with open('C:\...result.csv','wb') as f: #I will need the 'a' mode?
writer = csv.writer(f)
for key, value in mydict.items():
writer.writerow([key, value.encode('utf8)'])
如果只有值是unicode,或
...
writer.writerow([key.encode('utf8'), value.encode('utf8)'])
如果键和值都是unicode(你可能知道,我不能......)