示例数据:
id, Name, mail, data1, data2, data3 1, Name1, mail@com, abc, 14, de 1, Name1, mail@com, fgh, 25, kl 1, Name1, mail@com, mno, 38, pq 2, Name2, mail@com, abc, 14, d
我写了一个脚本,选择第一个字段是一个唯一的字符串来清除重复项。但是,由于不重复date1-3字段中的数据,因此需要进行结果:
1,Name1,mail @ com," abc,14,de,fgh,25,kl,mno,38,pq"
如何合并数组中的行? 我的代码不起作用:
import sys
import csv
in_fln = sys.argv[1]
# You can replace here and choose any delimiter:
csv.register_dialect('dlm', delimiter=',')
csv.register_dialect('dmt', delimiter=';')
# if this .csv file do:
if (in_fln[-3:]) == "csv":
out_fln = 'out' + in_fln
inputf = open(in_fln, 'r')
seen = []
outfile = []
nout = {}
#rowun = []
try:
reader = csv.reader(inputf, dialect='dlm')
# select by ContactID
for row in reader:
if row[0] not in seen:
#IT'S work byt temp comment
#rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"'
#outfile.append(row[:-5]+[rowun])
outfile.append(row[:-4])
rowun = (row[0])
nout[rowun] = (row[-4:-1])
seen.append(row[0])
print (type(row))
else:
#rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"'
#nout.insert(-1,(row[-4:-1]))
print (type(row))
rowun = (row[0])
rowun2 = {rowun:(row[-4:-1])}
nout.update(rowun2)
finally:
#print (nout)
#print (outfile[:-1])
#csv.writer(open(('nout' + in_fln), 'w', newline='')).writerows(nout)
csv.writer(open(out_fln, 'w', newline=''), dialect='dlm').writerows(outfile)
inputf.close()
print ("All done")
答案 0 :(得分:0)
这应该可以解决问题。
from collections import defaultdict
import pandas as pd
# recreate your example
df = pd.DataFrame([[1, 'Name1', 'mail@com', 'abc', 14, 'de'],
[1, 'Name1', 'mail@com', 'fgh', 25, 'kl'],
[1, 'Name1', 'mail@com', 'mno', 38, 'pq'],
[2, 'Name2', 'mail@com', 'abc', 14, 'd']
], columns=['id', 'Name', 'mail', 'data1', 'data2','data3'])
res = defaultdict(list)
for ind, row in df.iterrows():
key = (row['id'], row['Name'], row['mail'])
value = (row['data1'], row['data2'], row['data3'])
res[key].append(value)
for key, value in res.items():
print(key, value)
# gives
# (2, 'Name2', 'mail@com') [('abc', 14, 'd')]
# (1, 'Name1', 'mail@com') [('abc', 14, 'de'), ('fgh', 25, 'kl'), ('mno', 38, 'pq')]
答案 1 :(得分:0)
我自己的版本非常接近beter:
现在一切正常!
#!/usr/bin/env python3
import csv, re
import os, sys
in_fln = sys.argv[1]
# You can replace here and choose any delimiter:
#csv.register_dialect('dlm', delimiter=',')
dm = ','
seen = []
# if this .csv file do:
if (in_fln[-3:]) == "csv":
out_fln = 'out' + in_fln
#create the full structure: output_rows
infile = csv.reader(open(in_fln, 'r'), delimiter=dm, quotechar='"')
output_rows = []
for row in infile:
a = 0
if row[0] not in seen:
seen.append(row[0])
output_rows.append(row[:-4])
#rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"'
rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2]
output_rows.append([rowun])
else:
#output_rows.append([row[-4], row[-3], row[-2]])
#rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"'
rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2]
#output_rows.insert(-1,[rowun])
#rowun = str(rowun)
#print (rowun)
output_rows[-1].append(rowun)
#Finally save it to a file
csv.writer(open(out_fln, 'w', newline=''), delimiter=dm, quotechar='"').writerows(output_rows)
chng = [
['","',','], # chng "," on ,
['\n"',',"'], # Del new str
]
input_file = open(out_fln).read()
output_file = open(out_fln,'w')
for string in chng:
input_file = re.sub(str(string[0]),str(string[1]),input_file)
output_file.write(input_file)
output_file.close()
print ("All done")