Python PANDAS获得多个数据片段的平均值

时间:2017-08-14 22:37:50

标签: python pandas

考虑以下数据集:

import pandas as pd

data = {"store_name":{"0":"StoreName","1":"StoreName","2":"StoreName","3":"StoreName","4":"StoreName",
                      "5":"StoreName","6":"StoreName","7":"StoreName","8":"StoreName","9":"StoreName",
                      "10":"StoreName","11":"StoreName","12":"StoreName","13":"StoreName","14":"StoreName",
                      "15":"StoreName","16":"StoreName","17":"StoreName","18":"StoreName","19":"StoreName",
                      "20":"StoreName","21":"StoreName","22":"StoreName","23":"StoreName","24":"StoreName",
                      "25":"StoreName","26":"StoreName","27":"StoreName","28":"StoreName","29":"StoreName",
                      "30":"StoreName","31":"StoreName","32":"StoreName","33":"StoreName","34":"StoreName",
                      "35":"StoreName","36":"StoreName","37":"StoreName","38":"StoreName","39":"StoreName",
                      "40":"StoreName","41":"StoreName","42":"StoreName","43":"StoreName","44":"StoreName",
                      "45":"StoreName","46":"StoreName","47":"StoreName","48":"StoreName","49":"StoreName"},
        "category":{"0":"Facial Care","1":"Food","2":"Food","3":"Food","4":"Food","5":"Food",
                    "6":"Soap & Bath","7":"Soap & Bath","8":"Facial Care","9":"Condiments & Sauces",
                    "10":"Yoga & Home Fitness","11":"Yoga & Home Fitness","12":"Honey & Sweeteners",
                    "13":"Honey & Sweeteners","14":"Honey & Sweeteners","15":"Honey & Sweeteners",
                    "16":"Honey & Sweeteners","17":"Honey & Sweeteners","18":"Honey & Sweeteners",
                    "19":"Honey & Sweeteners","20":"Honey & Sweeteners","21":"Honey & Sweeteners",
                    "22":"Candies Desserts & Toppings","23":"Honey & Sweeteners","24":"Honey & Sweeteners",
                    "25":"Honey & Sweeteners","26":"Dog","27":"Dog","28":"Cat","29":"Cat",
                    "30":"Cooking & Meal Ingredients","31":"Snacks","32":"Snacks","33":"Cooking & Meal Ingredients",
                    "34":"Cooking & Meal Ingredients","35":"Cooking & Meal Ingredients","36":"Bars, Cereals & Granolas",
                    "37":"Bars, Cereals & Granolas","38":"Candies Desserts & Toppings","39":"Cooking & Meal Ingredients",
                    "40":"Cooking & Meal Ingredients","41":"Snacks","42":"Snacks","43":"Cooking & Meal Ingredients",
                    "44":"Cooking & Meal Ingredients","45":"Cooking & Meal Ingredients","46":"Cooking & Meal Ingredients",
                    "47":"Snacks","48":"Cooking & Meal Ingredients","49":"Sun & Bug"},
        "brand":{"0":"Brand1","1":"Brand2","2":"Brand2","3":"Brand2","4":"Brand2","5":"Brand2",
                 "6":"Brand3","7":"Brand3","8":"Brand3","9":"Brand4","10":"Brand5","11":"Brand5",
                 "12":"ƒBrand6","13":"Brand6","14":"Brand6","15":"Brand6","16":"Brand6","17":"Brand6",
                 "18":"Brand6","19":"Brand6","20":"Brand6","21":"Brand6","22":"Brand6","23":"Brand6",
                 "24":"Brand6","25":"Brand6","26":"Zuke\'s","27":"Zuke\'s","28":"Zuke\'s","29":"Zuke\'s",
                 "30":"Brand8","31":"Brand8","32":"Brand8","33":"Brand8","34":"Brand8","35":"Brand8",
                 "36":"Brand8","37":"Brand8","38":"Brand8","39":"Brand8","40":"Brand8","41":"Brand8",
                 "42":"Brand8","43":"Brand8","44":"Brand8","45":"Brand8","46":"Brand8","47":"Brand8",
                 "48":"Brand8","49":"Brand7"},
        "store_price":{"0":4.49,"1":14.45,"2":13.49,"3":14.29,"4":13.99,"5":13.99,"6":2.65,"7":3.45,
                       "8":3.95,"9":3.75,"10":3.65,"11":6.95,"12":10.75,"13":10.75,"14":4.65,
                       "15":5.69,"16":3.95,"17":6.45,"18":3.45,"19":4.95,"20":4.45,"21":4.45,
                       "22":3.79,"23":4.95,"24":7.45,"25":7.49,"26":4.99,"27":4.99,"28":2.29,
                       "29":2.95,"30":1.89,"31":3.25,"32":3.25,"33":2.99,"34":2.99,"35":2.99,
                       "36":5.25,"37":5.25,"38":3.25,"39":2.25,"40":2.89,"41":4.25,"42":4.25,
                       "43":2.25,"44":2.05,"45":1.89,"46":2.49,"47":4.25,"48":2.49,"49":4.95},
         "Comp1":{"0": None,"1":15.9,"2":13.7,"3":15.9,"4":14.59,"5":13.99,"6": None,"7": None,"8": None,"9": None,
                  "10":10.0,"11":20.0,"12":19.69,"13":20.8,"14": None,"15": None,"16": None,"17":6.18,"18": None,"19": None,
                  "20": None,"21": None,"22":5.99,"23": None,"24": None,"25":18.99,"26":6.39,"27":6.39,"28": None,"29": None,
                  "30": None,"31": None,"32": None,"33": None,"34": None,"35": None,"36": None,"37": None,"38": None,"39": None,"40": None,
                  "41": None,"42": None,"43": None,"44": None,"45": None,"46": None,"47": None,"48": None,"49":7.19},
         "Comp5":{"0":6.72,"1": None,"2": None,"3": None,"4": None,"5": None,"6": None,"7": None,"8":5.79,"9": None,
                  "10": None,"11": None,"12":10.55,"13":11.4,"14": None,"15":8.27,"16":5.01,"17": None,"18": None,
                  "19":4.71,"20": None,"21":5.49,"22": None,"23": None,"24": None,"25": None,"26": None,"27":6.46,
                  "28": None,"29": None,"30": None,"31":3.94,"32": None,"33": None,"34": None,"35": None,"36":5.0,"37":5.0,
                  "38": None,"39": None,"40":4.31,"41":4.7,"42": None,"43": None,"44": None,"45": None,"46": None,"47":4.64,
                  "48": None,"49": None},
         "Comp4":{"0":4.49,"1": None,"2": None,"3": None,"4": None,"5": None,"6":3.09,"7": None,"8":4.39,"9":4.59,
                  "10": None,"11": None,"12":10.79,"13":11.09,"14":5.13,"15":6.89,"16":4.39,"17":6.67,"18":3.59,
                  "19":5.21,"20":4.29,"21":4.99,"22":3.89,"23":5.39,"24": None,"25": None,"26":6.09,"27":6.09,
                  "28":2.89,"29": None,"30":2.19,"31":3.79,"32":3.32,"33":3.39,"34":3.39,"35":3.39,"36":6.29,
                  "37":6.29,"38":3.99,"39":2.89,"40":3.59,"41":4.99,"42":4.69,"43":2.89,"44":2.59,"45":2.19,
                  "46":2.99,"47":4.99,"48":2.99,"49": None},
         "Comp2":{"0":4.77,"1":13.66,"2": None,"3":11.38,"4":14.59,"5": None,"6": None,"7":4.99,"8":4.99,"9": None,"10": None,
                  "11": None,"12":9.845,"13":13.12,"14": None,"15":11.23,"16":4.67,"17": None,"18":3.82,"19":3.88,"20":3.48,
                  "21":5.7,"22": None, "23":7.69,"24":8.18,"25": None,"26":8.99,"27": None,"28":1.95,"29":4.87,"30":1.72,
                  "31":2.69,"32":2.82,"33": None,"34": None,"35":2.43,"36":3.98,"37":3.98,"38":3.28,"39": None,"40": None,
                  "41":4.39,"42":3.99,"43":1.97,"44": None,"45":1.72,"46":2.49,"47": None,"48":2.48,"49":6.39},
         "Comp3":{"0":6.6833333333,"1": None,"2": None,"3": None,"4": None,"5": None,"6":3.79,"7":4.12,"8":5.46,
                  "9": None,"10": None,"11": None,"12":13.57,"13":14.1233333333,"14":4.52,"15":10.035,"16":5.2066666667,
                  "17":6.23,"18":4.095,"19":6.4733333333,"20":5.3866666667,"21":5.025,"22":4.41,"23":7.525,
                  "24":8.38,"25": None,"26":7.42,"27":6.85,"28":3.21,"29": None,"30":2.3733333333,"31": None,"32":4.315,
                  "33": None,"34": None,"35": None,"36": None,"37": None,"38":4.7133333333,"39":3.495,"40":3.8833333333,
                  "41":5.73,"42":5.73,"43":3.495,"44": None,"45":2.575,"46":3.495,"47":5.79,"48":3.47,"49":8.21}}

df = pd.DataFrame(data)

对于每个品牌和每个comp [n],我希望得到平均商店价格超过comp [n]价格,其中comp [n]和商店价格都有价格。我尝试过类似的事情:

brand = df.groupby('brand')['store_price','Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5'].mean()

for comp in ['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5']:
    brand[comp] = 1.0 * (brand['store_price']/brand[comp])

这显然不起作用,因为每个Comp [n]的平均值与StoreName的整个品牌的平均值进行比较。它必须是具有comp [n]和StoreName价格的商品的美元对美元比率,反映为store_price。

我在想我可能需要运行类似的东西:

for i in df.index:
    for comp in ['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5']:
        df.loc[i, comp] = 1.0*(df.loc[i, 'store_price']/df.loc[i,comp])

 brand = df.groupby('brand')['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5'].mean().reset_index()

但是,我觉得必须有一种更智能的方法来切片数据,运行计算,然后将这些切片粘贴回数据帧。

2 个答案:

答案 0 :(得分:3)

我们试试这个:

comp_cols = ['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5']   
df1 = df.set_index('brand')
brand = (1 / df1[comp_cols]).mul(df1['store_price'], axis=0)\
                          .groupby(level=0).mean().reset_index()

print(brand)

输出:

        brand     Comp1     Comp2     Comp3     Comp4     Comp5
0   Brand1       NaN  0.941300  0.671820  1.000000  0.668155
1   Brand2  0.950219  1.090807       NaN       NaN       NaN
2   Brand3       NaN  0.741483  0.753343  0.878689  0.682211
3   Brand4       NaN       NaN       NaN  0.816993       NaN
4   Brand5  0.356250       NaN       NaN       NaN       NaN
5   Brand6  0.646914  0.884963  0.822998  0.936476  0.856191
6   Brand7  0.688456  0.774648  0.602923       NaN       NaN
7   Brand8       NaN  1.122867  0.721023  0.848478  0.902602
8   Zuke's  0.780908  0.778390  0.704790  0.810380  0.772446
9  ƒBrand6  0.545962  1.091925  0.792189  0.996293  1.018957

答案 1 :(得分:3)

要避免嵌套循环,请考虑lambda中具有匿名apply函数的列中的update

df.update(df[['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5']].\
     apply(lambda col: df['store_price'] / col, axis=0))

brand = df.groupby('brand')['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5'].mean().reset_index()