我有一个简单的2列csv,需要找到每个键的平均值 即 输入csv
A,2
B,3
A,1
C,2
B,2
D,4
C,2
必需的输出
{'A': 1.5, 'B': 2.5, 'C': 2, 'D': 4}
到目前为止的代码:
pythoncsvfile = open("data.csv")
csv_reader = csv.reader(csvfile, delimiter=',')
for row in csv_reader:
print (row[0],row[1])
答案 0 :(得分:2)
选项A
使用csv
import csv
import collections
out = collections.defaultdict(list)
with open('file.csv') as f:
for line in csv.reader(f):
out[line[0]].append(int(line[1]))
for k in out:
out[k] = sum(out[k]) / len(out[k])
print(dict(out))
{'A': 1.5, 'B': 2.5, 'C': 2.0, 'D': 4.0}
选项B
使用pandas
import pandas as pd
df = pd.read_csv('file.csv', header=None, names=['Key', 'Value'])
out = df.groupby('Key').mean()
print(out.Value.to_dict())
{'A': 1.5, 'B': 2.5, 'C': 2.0, 'D': 4.0}
答案 1 :(得分:1)
我认为您可以使用以下代码:
import csv
from collections import OrderedDict
data = OrderedDict()
with open('data.csv', 'rb') as csvfile:
content = csv.reader(csvfile, delimiter=',')
for index, value in content:
if ( not data.has_key(index) ):
#initialize
data[index] = {'times':1, 'total':float(value)}
else:
#index already present
data[index] = {'times': data[index]["times"]+1, 'total':data[index]["total"]+float(value)}
def average(data):
results = OrderedDict()
for index, values in data.iteritems():
results[index] = values["total"]/values["times"]
return results
print average(data)
OrderedDict([('A', 1.5), ('B', 2.5), ('C', 2.0), ('D', 4.0)])
HTH