我在初始输入csv文件中读取并通过以下方式进行修改:
但是,因为我不需要像test1.txt那样的文件而只想直接从输入文件(csv_sample.txt)直接输入test2.csv(最终结果) 如何通过最小化代码来解决这个问题? 我想如果使用熊猫会更好,我不习惯它..
以及如果我将此代码应用于不同的csv输入文件(具有相同的形式,但输入编号不同)而根本不更正内部模块?
这是我到目前为止所做的:
import csv
with open("./csv_sample.txt") as infile:
reader = csv.reader(infile)
header = next(reader)
data = list(reader)
useful_cols = set(['out_gate','in_gate','n_con'])
col_num, new_header = zip(*[(i,col) for i,col in enumerate(header)
if col in useful_cols])
new_data = [[row[i] for i in col_num] for row in data]
new_data = [row for row in new_data if row[0].startswith("/a_")]
with open("./test1.txt", "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(new_header)
writer.writerows(new_data)
import csv
from collections import defaultdict
d = defaultdict(lambda : defaultdict(int))
with open("./test1.txt") as in_file:
csv_reader = csv.reader(in_file)
next(csv_reader)
for row in csv_reader:
if len(row) >= 3:
x,y,count = row
d[x][y] += int(count)
keys = sorted(d)
new_data0 = [[d[x][y]for y in keys] for x in keys]
new_data1 = [[new_data0[x][y] + new_data0[y][x] if x!=y else new_data0[x][y] for x in range(len(new_data0))] for y in range(len(new_data0[0]))]
new_data2 = [[i]+j for i,j in zip(keys,new_data1)]
keys.insert(0,' ')
with open("test2.csv", "w") as outfile1:
writer = csv.writer(outfile1)
writer.writerow(keys)
writer.writerows(new_data2)
[ csv_sample.txt ]
out_gate,uless_col,in_gate,n_con
p,x,x,1
p,x,/a_y,1
/a_a,x,z,1
/a_a,s,/a_a,3
/a_a,u,/a_b,1
/a_a,s,/a_b,3
/a_b,e,/a_a,2
/a_b,s,/a_b,2
/a_b,l,/a_c,4
/a_c,e,/a_a,5
/a_c,s,/a_b,5
/a_c,s,/a_b,3
/a_c,c,/a_a,4
/a_d,o,/a_c,2
/a_d,l,/a_c,3
/a_d,m,/a_b,2
p,y,/a_x,1
p,y,y,1
p,y,z,3
[ test1.txt ]
out_gate,in_gate,n_con
/a_a,/a_b,1
/a_a,/a_b,3
/a_b,/a_a,2
/a_b,/a_c,4
/a_c,/a_a,5
/a_c,/a_b,5
/a_c,/a_b,3
/a_c,/a_a,4
/a_d,/a_c,2
/a_d,/a_c,3
/a_d,/a_b,2
[test2.csv (shown as excel)]
/a_a /a_b /a_c /a_d
/a_a 3 6 9 0
/a_b 6 2 12 2
/a_c 9 12 0 5
/a_d 0 2 5 0
答案 0 :(得分:0)
您在d
阅读时可以创建csv_sample.txt
:
from collections import defaultdict
from operator import itemgetter
import csv
keep_cols = itemgetter(0, 2, 3) # required column numbers
d = defaultdict(lambda : defaultdict(int))
with open('csv_sample.txt', newline='') as f_input:
csv_input = csv.reader(f_input)
next(csv_input) # skip header
for row in csv_input:
if row[0].startswith('/a_'):
x, y, count = keep_cols(row)
d[x][y] += int(count)
keys = sorted(d)
new_data0 = [[d[x][y]for y in keys] for x in keys]
new_data1 = [[new_data0[x][y] + new_data0[y][x] if x!=y else new_data0[x][y] for x in range(len(new_data0))] for y in range(len(new_data0[0]))]
new_data2 = [[i]+j for i,j in zip(keys,new_data1)]
keys.insert(0, ' ')
with open('test2.csv', 'w', newline='') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow(keys)
csv_output.writerows(new_data2)
因此,没有必要编写文本文件。