如何合并数组Python3,csv中的行

时间:2016-08-18 19:42:15

标签: python arrays csv

示例数据:

id, Name, mail, data1, data2, data3 
1, Name1, mail@com, abc, 14, de 
1, Name1, mail@com, fgh, 25, kl 
1, Name1, mail@com, mno, 38, pq 
2, Name2, mail@com, abc, 14, d

我写了一个脚本,选择第一个字段是一个唯一的字符串来清除重复项。但是,由于不重复date1-3字段中的数据,因此需要进行结果:

  

1,Name1,mail @ com," abc,14,de,fgh,25,kl,mno,38,pq"

如何合并数组中的行? 我的代码不起作用:

import sys
import csv

in_fln = sys.argv[1]
# You can replace here and choose any delimiter:
csv.register_dialect('dlm', delimiter=',')
csv.register_dialect('dmt', delimiter=';')
# if this .csv file do:
if (in_fln[-3:]) == "csv":
    out_fln = 'out' + in_fln
    inputf = open(in_fln, 'r')
    seen = []
    outfile = []
    nout = {}
    #rowun = []
    try:
        reader = csv.reader(inputf, dialect='dlm')
        # select by ContactID
        for row in reader:
            if row[0] not in seen:
                #IT'S work byt temp comment
                #rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2])  + '"'
                #outfile.append(row[:-5]+[rowun])
                outfile.append(row[:-4])
                rowun = (row[0])
                nout[rowun] = (row[-4:-1])
                seen.append(row[0])
                print (type(row))
            else:
                #rowun = '"'  + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2])  + '"'              
                #nout.insert(-1,(row[-4:-1]))
                print (type(row))
                rowun = (row[0])
                rowun2 = {rowun:(row[-4:-1])}
                nout.update(rowun2)


    finally:
        #print (nout)
        #print (outfile[:-1])
        #csv.writer(open(('nout' + in_fln), 'w', newline='')).writerows(nout)
        csv.writer(open(out_fln, 'w', newline=''), dialect='dlm').writerows(outfile)
        inputf.close()
        print ("All done")

2 个答案:

答案 0 :(得分:0)

这应该可以解决问题。

from collections import defaultdict
import pandas as pd


# recreate your example
df = pd.DataFrame([[1, 'Name1', 'mail@com', 'abc', 14, 'de'],
                   [1, 'Name1', 'mail@com', 'fgh', 25, 'kl'],
                   [1, 'Name1', 'mail@com', 'mno', 38, 'pq'],
                   [2, 'Name2', 'mail@com', 'abc', 14, 'd']
                  ], columns=['id', 'Name', 'mail', 'data1', 'data2','data3'])

res = defaultdict(list)

for ind, row in df.iterrows():
    key = (row['id'], row['Name'], row['mail'])
    value = (row['data1'], row['data2'], row['data3'])
    res[key].append(value)

for key, value in res.items():
    print(key, value)

# gives 
# (2, 'Name2', 'mail@com') [('abc', 14, 'd')]
# (1, 'Name1', 'mail@com') [('abc', 14, 'de'), ('fgh', 25, 'kl'), ('mno', 38, 'pq')]

答案 1 :(得分:0)

我自己的版本非常接近beter:

现在一切正常!

#!/usr/bin/env python3
import csv, re
import os, sys
in_fln = sys.argv[1]
# You can replace here and choose any delimiter:
#csv.register_dialect('dlm', delimiter=',')
dm = ','
seen = []
# if this .csv file do:
if (in_fln[-3:]) == "csv":
    out_fln = 'out' + in_fln
    #create the full structure:  output_rows
    infile = csv.reader(open(in_fln, 'r'), delimiter=dm, quotechar='"')
    output_rows = []
    for row in infile:
        a = 0
        if row[0] not in seen:
            seen.append(row[0])
            output_rows.append(row[:-4])
            #rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2]  + '"'
            rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2]
            output_rows.append([rowun])
        else:
            #output_rows.append([row[-4], row[-3], row[-2]])
            #rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2]  + '"' 
            rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2]
            #output_rows.insert(-1,[rowun])
            #rowun = str(rowun)
            #print (rowun)
            output_rows[-1].append(rowun)
    #Finally save it to a file
    csv.writer(open(out_fln, 'w', newline=''), delimiter=dm, quotechar='"').writerows(output_rows)
chng = [
['","',','], # chng "," on ,
['\n"',',"'], # Del new str
]
input_file = open(out_fln).read()
output_file = open(out_fln,'w')

for string in chng:
    input_file = re.sub(str(string[0]),str(string[1]),input_file)

output_file.write(input_file)
output_file.close()
print ("All done")