数据
data = {"account":{"0":383080,"1":383080,"2":383080,"3":412290,"4":412290,"5":412290,"6":412290,"7":412290,"8":218895,"9":218895,"10":218895,"11":218895},"name":{"0":"Will LLC","1":"Will LLC","2":"Will LLC","3":"Jerde-Hilpert","4":"Jerde-Hilpert","5":"Jerde-Hilpert","6":"Jerde-Hilpert","7":"Jerde-Hilpert","8":"Kulas Inc","9":"Kulas Inc","10":"Kulas Inc","11":"Kulas Inc"},"order":{"0":10001,"1":10001,"2":10001,"3":10005,"4":10005,"5":10005,"6":10005,"7":10005,"8":10006,"9":10006,"10":10006,"11":10006},"sku":{"0":"B1-20000","1":"S1-27722","2":"B1-86481","3":"S1-06532","4":"S1-82801","5":"S1-06532","6":"S1-47412","7":"S1-27722","8":"S1-27722","9":"B1-33087","10":"B1-33364","11":"B1-20000"},"quantity":{"0":7,"1":11,"2":3,"3":48,"4":21,"5":9,"6":44,"7":36,"8":32,"9":23,"10":3,"11":-1},"unit price":{"0":33.69,"1":21.12,"2":35.99,"3":55.82,"4":13.62,"5":92.55,"6":78.91,"7":25.42,"8":95.66,"9":22.55,"10":72.3,"11":72.18},"ext price":{"0":235.83,"1":232.32,"2":107.97,"3":2679.36,"4":286.02,"5":832.95,"6":3472.04,"7":915.12,"8":3061.12,"9":518.65,"10":216.9,"11":72.18}}
pd.DataFrame(data=data)
当前解决方案
sku_total = df.groupby(['order','sku'])['ext price'].sum().rename('sku total').reset_index()
sku_total['sku total'] / sku_total['order'].map(df.groupby('order')['ext price'].sum())
问题
如何划分:
df.groupby(['order','sku'])['ext price'].sum()
通过
df.groupby('order')['ext price'].sum()
是否无需reset_index?
答案 0 :(得分:2)
IIUC,
我们可以使用transform
,它允许您在维护索引的同时进行分组操作:
然后,您可以根据需要将变量分配给新列。
s = (df.groupby(['order','sku'])['ext price'].transform('sum')
/ df.groupby('order')['ext price'].transform('sum'))
print(s)
0 0.409342
1 0.403249
2 0.187409
3 0.429090
4 0.034942
5 0.429090
6 0.424170
7 0.111798
8 0.791222
9 0.134058
10 0.056063
11 0.018657
答案 1 :(得分:2)
div
不能解决问题,还是我无法正确理解某些内容?
import pandas as pd
import numpy as np
data = {"account":{"0":383080,"1":383080,"2":383080,"3":412290,"4":412290,"5":412290,"6":412290,"7":412290,"8":218895,"9":218895,"10":218895,"11":218895},"name":{"0":"Will LLC","1":"Will LLC","2":"Will LLC","3":"Jerde-Hilpert","4":"Jerde-Hilpert","5":"Jerde-Hilpert","6":"Jerde-Hilpert","7":"Jerde-Hilpert","8":"Kulas Inc","9":"Kulas Inc","10":"Kulas Inc","11":"Kulas Inc"},"order":{"0":10001,"1":10001,"2":10001,"3":10005,"4":10005,"5":10005,"6":10005,"7":10005,"8":10006,"9":10006,"10":10006,"11":10006},"sku":{"0":"B1-20000","1":"S1-27722","2":"B1-86481","3":"S1-06532","4":"S1-82801","5":"S1-06532","6":"S1-47412","7":"S1-27722","8":"S1-27722","9":"B1-33087","10":"B1-33364","11":"B1-20000"},"quantity":{"0":7,"1":11,"2":3,"3":48,"4":21,"5":9,"6":44,"7":36,"8":32,"9":23,"10":3,"11":-1},"unit price":{"0":33.69,"1":21.12,"2":35.99,"3":55.82,"4":13.62,"5":92.55,"6":78.91,"7":25.42,"8":95.66,"9":22.55,"10":72.3,"11":72.18},"ext price":{"0":235.83,"1":232.32,"2":107.97,"3":2679.36,"4":286.02,"5":832.95,"6":3472.04,"7":915.12,"8":3061.12,"9":518.65,"10":216.9,"11":72.18}}
df = pd.DataFrame(data=data)
print(df)
df_1 = df.groupby(['order','sku'])['ext price'].sum()
df_2 = df.groupby('order')['ext price'].sum()
df_res = df_1.div(df_2)
print(df_res)
输出:
order sku
10001 B1-20000 0.409342
B1-86481 0.187409
S1-27722 0.403249
10005 S1-06532 0.429090
S1-27722 0.111798
S1-47412 0.424170
S1-82801 0.034942
10006 B1-20000 0.018657
B1-33087 0.134058
B1-33364 0.056063
S1-27722 0.791222
Name: ext price, dtype: float64