我正在尝试解析一个csv文件,但似乎我遗漏了一些基本的东西而且无法正确使用它。 csv的每个原始包含{}中的一个字符串,包括随机排序的几个参数,如下例所示。
Timestamp,Session Index,Event,Description,Version,Platform,Device,User ID,Params,
"Dec 03, 2014 01:30 AM",1,NoRegister,,1.4.0,iPhone,Apple iPhone 5s (GSM),,{},
"Dec 03, 2014 01:30 AM",2,HomeTab,Which tab the user viewed ,1.4.0,iPhone,Apple iPhone 5s (GSM),,{ UserID : 36875; tabName : QuickAndEasy},
"Dec 03, 2014 01:30 AM",3,UserRecipeOverview,How many users go to Overview of a recipe?,1.4.0,iPhone,Apple iPhone 5s (GSM),,{ RecipeID : 1488; UserID : 36875},
我的代码如下,但是我收到一个我不理解的错误:
counter = 0
mappedLines = {}
import csv
with open ('test.csv', 'r') as f:
reader = csv.reader (f)
for line in reader:
counter = counter + 1
lineDict = {}
line = line.replace("{","")
line = line.replace("}","")
line = line.strip()
fieldPairs = line.split(";")
for pair in fieldPairs:
fields = pair.split(":")
key = fields[0].strip()
value = fields[1].strip()
lineDict[key] = value
mappedLines[counter] = lineDict
def printFields(keys, lineSets):
output_line = ""
for key in keys:
if key in lineSets:
output_line = output_line + lineSets[key] + ","
else:
output_line += ","
print output_line[0:len(output_line) - 1]
fields = ["UserID", "tabName", "RecipeID", "type", "searchWord", "isFromLabel"]
for key in range(1,len(mappedLines) + 1):
lineSets = mappedLines[key]
printFields(fields,lineSets)
这是追溯:
Traceback (most recent call last):
File "testV3.py", line 14, in <module>
line = line.replace("{","")
AttributeError: 'list' object has no attribute 'replace'
编辑:
我现在正在尝试使用write函数将输出保存到新的csv文件中,如下所示。 csv仅记录标题和列。
import csv
def printfields(keys, linesets):
output_line = ""
for key in keys:
if key in linesets:
output_line += linesets[key] + ","
else:
output_line += ","
print output_line
def csv_writer(reader, path):
"""
write reader to a csv file path
"""
with open(path, "wd") as csv_file:
writer = csv.writer(csv_file, delimiter=",")
for line1 in line:
if line1 in path
writer.writerow(line1)
if __name__ == "__main__":
fields = [
"UserID", "tabName", "RecipeID", "type", "searchWord", "isFromLabel", "targetUID"
]
mappedLines = {}
with open('test.csv', 'r') as f:
reader = csv.DictReader(f)
for line in reader:
fieldPairs = [
p for p in
line['Params'].strip().strip('}').strip('{').strip().split(';')
if p
]
lineDict = {
pair.split()[0].strip(): pair.split(':')[1].strip()
for pair in fieldPairs
}
mappedLines[reader.line_num] = lineDict
path = "output.csv"
csv_writer(reader, path)
for key in sorted(mappedLines.keys()):
linesets = mappedLines[key]
printfields(fields, linesets)
答案 0 :(得分:1)
line
是包含当前行的单元格的列表。要访问其中一个,请使用循环:
for cell in line:
cell.replace(...)
答案 1 :(得分:0)
您可以使用以下语句删除&#34; {&#34;和&#34;}&#34;在字符串
列表中line = ".".join(line).replace("{","").replace("}","").split(",")
答案 2 :(得分:0)
我重新安排了您的代码并对其进行了修改。更改是它使用csv.DictReader
,并且不再使用计数器变量。并且不再使用for循环中的range
函数。
import csv
def printFields(keys, lineSets):
output_line = ""
for key in keys:
if key in lineSets:
output_line += lineSets[key] + ","
else:
output_line += ","
print output_line
if __name__ == "__main__":
fields = [
"UserID", "tabName", "RecipeID", "type", "searchWord", "isFromLabel"
]
mappedLines = {}
with open('test.csv', 'r') as f:
reader = csv.DictReader(f)
for line in reader:
fieldPairs = [
p for p in
line['Params'].strip().strip('}').strip('{').strip().split(';')
if p
]
lineDict = {
pair.split()[0].strip(): pair.split(':')[1].strip()
for pair in fieldPairs
}
mappedLines[reader.line_num] = lineDict
for key in sorted(mappedLines.keys()):
lineSets = mappedLines[key]
printFields(fields, lineSets)