data = {"index":{"0":1692,"1":1771,"2":1007,"3":2915,"4":1416},
"item_number":{"0":"123","1":"123","2":"124","3":"124","4":"125"},
"brand":{"0":"brand1","1":"brand1","2":"brand2","3":"brand2","4":"brand3"},
"price":{"0":20.00,"1":20.00,"2":25.00,"3":25.00,"4":30.00},
"comp_id":{"0":1,"1":2,"2":1,"3":3,"4":2},
"comp":{"0":"comp1","1":"comp2","2":"comp1","3":"comp3","4":"comp2"},
"comp_price":{"0":21.00,"1":20.99,"2":16.00,"3":15.99,"4":29.99}}
df1 = pd.DataFrame(data=data)
g = df1.groupby('brand')
v = df1[df1['price']>df1['comp_price']].groupby('brand')
#number of skus within each brand
brand_sku_count = g.apply(lambda x: len(x['item_number'].unique()))
#number of skus violated within each brand
brand_vio_count = v.apply(lambda x: len(x['item_number'].unique()))
#number of sellers within each brand
total_sellers = g.apply(lambda x: len(x['comp_id'].unique()))
#number of violators within each brand
total_violators = v.apply(lambda x: len(x['comp_id'].unique()))
brand_report = pd.concat([brand_sku_count, brand_vio_count,
total_sellers, total_violators], axis=1)
brand_report.columns = ['sku_count','vio_count','total_comps','total_vios']
以上是我的旧代码,我最近发现了transform和agg函数。我试图学习如何减少一次一个地执行这些功能,然后使用concat将它们全部拼接在一起。我觉得这里有机会大大减少代码行数。
我已经阅读了您可以执行以下操作的问题:
df1.groupby('brand')['item_number'].agg(['sum','count'])
我已经尝试过:
f1 = lambda x: len(x['item_number'].unique())
f2 = lambda x: len(x['comp_id'].unique())
f = {'item_number':f1, 'comp_id':f2}
df1.groupby('brand').agg(f)
返回:
KeyError: 'item_number'
所以我试过了:
f1 = lambda x: len(x.get_group('item_number').unique())
f2 = lambda x: len(x.get_group('comp_id').unique())
f = {'item_number':f1, 'comp_id':f2}
df1.groupby('brand').agg(f)
这返回错误,指出Series对象没有get_group
答案 0 :(得分:2)
试试这个
f1 = lambda x: len(x.unique())
f = {'item_number':f1, 'comp_id':f1}
df1.groupby('brand').agg(f)
Out[881]:
item_number comp_id
brand
brand1 1 2
brand2 1 2
brand3 1 1
答案 1 :(得分:0)
数据透视表也可以使用:
WHERE `value` REGEXP '[[:alpha:]]'
AND `value` REGEXP '[[:digit:]]'