首先,生成简单的数据帧并应用groupby和count nums。
import pandas as pd
import random
letters = ['a', 'b', 'c', ]
nums = [ str(n) for n in range(1, 6)]
combs = [
[random.choice(letters), random.choice(nums)] for i in range(10)
]
combs_df = pd.DataFrame(combs)
combs_df
"""
0 1
0 c 3
1 a 4
2 b 1
3 a 2
4 a 2
5 c 1
6 a 3
7 c 1
8 c 2
9 b 1
"""
s1 = combs_df.groupby([0, 1])[1].count()
type(s1)
# pandas.core.series.Series
s1.index
"""
MultiIndex(levels=[['a', 'b', 'c'], ['1', '2', '3', '4']],
codes=[[0, 0, 0, 1, 2, 2, 2], [1, 2, 3, 0, 0, 1, 2]],
names=[0, 1])
"""
s1
"""
0 1<-----column 1
a 2 2
3 1<-- count
4 1 ...
b 1 2
c 1 2
2 1
3 1
Name: 1, dtype: int64
我知道如何使用python for循环实现此功能。
"""
1 2 3 4 <-- nums
letters--> a 0 2 1 1
b 2 0 0 0 <-- count
c 2 1 1 0
"""
答案 0 :(得分:3)
s1 = s1.reset_index(name='count')
s2 = pd.pivot(s1, index=0, columns=1, values='count').fillna(0)
print(s2)
给予
1 1 2 3 4 5
0
a 0.0 0.0 1.0 0.0 1.0
b 1.0 2.0 1.0 0.0 0.0
c 2.0 0.0 1.0 1.0 0.0