我尝试使用utf-8文本格式加载.csv文件,并使用管道分隔符以cp1252(ansi)格式编写。以下代码适用于Python 3.6,但我需要它在Python 2.6中工作。但是,打开'函数不允许Python 2.6中的编码关键字。
import datetime
import csv
# Define what filenames to read
filenames = ["FILE1","FILE2"]
infilenames = [filename+".csv" for filename in filenames]
outfilenames = [filename+"_out_.csv" for filename in filenames]
# Read filenames in utf-8 and write them in cp1252
for infilename,outfilename in zip(infilenames,outfilenames):
infile = open(infilename, "rt",encoding="utf8")
reader = csv.reader(infile,delimiter=',',quotechar='"',quoting=csv.QUOTE_MINIMAL)
outfile = open(outfilename, "wt",encoding="cp1252")
writer = csv.writer(outfile, delimiter='|', quotechar='"', quoting=csv.QUOTE_NONE,escapechar='\\')
for row in reader:
writer.writerow(row)
infile.close()
outfile.close()
我尝试了几种解决方案:
有没有人在Python 2.X中知道正确的解决方案?
答案 0 :(得分:0)
这里可能有一些冗余代码,但我通过执行以下操作来实现此目的:
...
import datetime
import csv
# Define what filenames to read
filenames = ["FILE1","FILE2"]
infilenames = [filename+".csv" for filename in filenames]
outfilenames = [filename+"_out_.csv" for filename in filenames]
midfilenames = [filename+"_mid_.csv" for filename in filenames]
# Iterate over each file
for infilename,outfilename,midfilename in zip(infilenames,outfilenames,midfilenames):
# Open file and read utf-8 text, then encode in cp1252
infile = open(infilename, "r")
infilet = infile.read()
infilet = infilet.decode("utf-8")
infilet = infilet.encode("cp1252","ignore")
#write cp1252 encoded file
midfile = open(midfilename,"w")
midfile.write(infilet)
midfile.close()
# read csv with new cp1252 encoding
midfile = open(midfilename,"r")
reader = csv.reader(midfile,delimiter=',', quotechar='"',quoting=csv.QUOTE_MINIMAL)
# define output
outfile = open(outfilename, "w")
writer = csv.writer(outfile, delimiter='|', quotechar='"',quoting=csv.QUOTE_NONE,escapechar='\\')
#write output to new csv file
for row in reader:
writer.writerow(row)
print("written file",outfilename)
infile.close()
midfile.close()
outfile.close()