我有一个utf-8编码的csv文件,想使用csv.DictReader而不是在链接中的类UnicodeReader中使用csv.Reader - https://docs.python.org/2/library/csv.html
class UTF8Recoder:
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
def __init__(self, f, encoding="utf-8"):
f = UTF8Recoder(f, encoding)
self.reader = csv.DictReader(f, delimiter=b',')
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
def csv_dict_reader(file_obj):
reader = UnicodeReader(file_obj)
for line in reader:
val = line [“first_column”]
if __name__ == "__main__":
try:
with open(“abcd.csv") as f_obj:
csv_dict_reader(f_obj)
except IOError:
print "Error: can\'t find file - tz_2k3.csv or read data"
raise SystemExit
我收到错误" TypeError:列表索引必须是整数,而不是unicode"
答案 0 :(得分:0)
什么是TypeError?
异常TypeError : 将操作或函数应用于不适当类型的对象时引发。关联值是一个字符串,提供有关类型不匹配的详细信息。 Just click on me for more information.
<强>演示:强>
>>> l1 = ["value1", "value2", "value3"]
>>> type(l1)
<type 'list'>
>>> l1[2] # Need index to access item from the list.
'value3'
>>> l1["value1"]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: list indices must be integers, not str
>>>
答案 1 :(得分:0)
我的同事通过修改UnicodeReader类中的next方法修复了这个问题,由于UTF-8 BOM字符存在一些问题
def next(self):
d = self.reader.next()
return {unicode(k, "utf-8").strip(u'\ufeff'): unicode(v, "utf-8").strip(u'\ufeff') for k, v in d.items()}