如何使用loc方法在数据框中设置列?我在下面发布了示例代码。我一直出现索引错误。
pandas.core.indexing.IndexingError:(slice(None,None,None), ['weight'])
import pandas as pd
data = {"survey_id": [101, 101, 101, 101, 101, 101,
102, 102, 102, 102, 102, 102,
103, 103, 103, 103, 103, 103,
104, 104, 104, 104, 104, 104],
"person": ['Ty', 'Ty', 'Ty', 'Ty', 'Ty', 'Ty',
'Bo', 'Bo', 'Bo', 'Bo', 'Bo', 'Bo',
'Al', 'Al', 'Al', 'Al', 'Al', 'Al',
'Bo', 'Bo', 'Bo', 'Bo', 'Bo', 'Bo'],
'question_num': ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6'],
'question_buckets': ['performance', 'performance', 'performance', 'performance', 'cost', 'cost',
'performance', 'performance', 'performance', 'performance', 'cost', 'cost',
'performance', 'performance', 'performance', 'performance', 'cost', 'cost',
'performance', 'performance', 'performance', 'performance', 'cost', 'cost'],
'question_short': ['timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI'],
'company': ['Opera', 'Opera', 'Opera', 'Opera', 'Opera', 'Opera',
'Firefox', 'Firefox', 'Firefox', 'Firefox', 'Firefox', 'Firefox',
'Safari', 'Safari', 'Safari', 'Safari', 'Safari', 'Safari',
'Chrome', 'Chrome', 'Chrome', 'Chrome', 'Chrome', 'Chrome'],
"rating": [4, 5, 3, 3, 3, 3,
4, 5, 5, 4, 5, 5,
1, 4, 1, 2, 1, 2,
1, 5, 1, 5, 1, 5],
'weight': [.12, .215, .2, .15, .135, .18,
.12, .215, .2, .15, .135, .18,
.12, .215, .2, .15, .135, .18,
.12, .215, .2, .15, .135, .18]
}
df = pd.DataFrame(data)
CompanyGroup = df.groupby('company')
firefox= CompanyGroup.get_group('Firefox')
chrome=CompanyGroup.get_group('Chrome')
firefox['weightsum'] = 0
firefox['weightsum'].loc[:,['weight']] =1
答案 0 :(得分:1)
这可以肯定
import pandas as pd
data = {"survey_id": [101, 101, 101, 101, 101, 101,
102, 102, 102, 102, 102, 102,
103, 103, 103, 103, 103, 103,
104, 104, 104, 104, 104, 104],
"person": ['Ty', 'Ty', 'Ty', 'Ty', 'Ty', 'Ty',
'Bo', 'Bo', 'Bo', 'Bo', 'Bo', 'Bo',
'Al', 'Al', 'Al', 'Al', 'Al', 'Al',
'Bo', 'Bo', 'Bo', 'Bo', 'Bo', 'Bo'],
'question_num': ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6'],
'question_buckets': ['performance', 'performance', 'performance', 'performance', 'cost', 'cost',
'performance', 'performance', 'performance', 'performance', 'cost', 'cost',
'performance', 'performance', 'performance', 'performance', 'cost', 'cost',
'performance', 'performance', 'performance', 'performance', 'cost', 'cost'],
'question_short': ['timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI'],
'company': ['Opera', 'Opera', 'Opera', 'Opera', 'Opera', 'Opera',
'Firefox', 'Firefox', 'Firefox', 'Firefox', 'Firefox', 'Firefox',
'Safari', 'Safari', 'Safari', 'Safari', 'Safari', 'Safari',
'Chrome', 'Chrome', 'Chrome', 'Chrome', 'Chrome', 'Chrome'],
"rating": [4, 5, 3, 3, 3, 3,
4, 5, 5, 4, 5, 5,
1, 4, 1, 2, 1, 2,
1, 5, 1, 5, 1, 5],
'weight': [.12, .215, .2, .15, .135, .18,
.12, .215, .2, .15, .135, .18,
.12, .215, .2, .15, .135, .18,
.12, .215, .2, .15, .135, .18]
}
df = pd.DataFrame(data)
def CalcNewRatings(row):
return row['rating'] * row['weight']
PersonGroup = df.groupby('person')
def calc(name):
name.loc[:,'weightsum'] = 1
name['ratingtimesweight'] = name.apply(CalcNewRatings, axis=1)
name['totalrating'] = name['ratingtimesweight'].sum(axis=0,skipna = True)
ms=name.groupby(['company']).groups.keys()
for comp in ms:
name.loc[name['company']== comp ,'totalrating']=name.loc[name['company']== comp ,'ratingtimesweight'].sum()
name['finalgrade'] = name['totalrating'].div(name['weightsum'])
return name
newData=pd.DataFrame()
for perso in PersonGroup.groups.keys():
someone = PersonGroup.get_group(perso)
newData=newData.append(calc(someone), ignore_index=True )
print(newData)
答案 1 :(得分:0)
我不确定您要做什么,但是如果您尝试将所有权重更改为1,请考虑将最后一行更改为firefox.loc[:, 'weight'] = 1
。
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html
答案 2 :(得分:0)
您这样做:
df.loc[df['company'] == 'Firefox', 'weightsum'] = 1
df
输出:
survey_id person question_num question_buckets question_short company rating weight weightsum
0 101 Ty Q1 performance timely Opera 4 0.120 NaN
1 101 Ty Q2 performance diversity Opera 5 0.215 NaN
2 101 Ty Q3 performance knowledge Opera 3 0.200 NaN
3 101 Ty Q4 performance transparency Opera 3 0.150 NaN
4 101 Ty Q5 cost budgeting Opera 3 0.135 NaN
5 101 Ty Q6 cost ROI Opera 3 0.180 NaN
6 102 Bo Q1 performance timely Firefox 4 0.120 1.0
7 102 Bo Q2 performance diversity Firefox 5 0.215 1.0
8 102 Bo Q3 performance knowledge Firefox 5 0.200 1.0
9 102 Bo Q4 performance transparency Firefox 4 0.150 1.0
10 102 Bo Q5 cost budgeting Firefox 5 0.135 1.0
11 102 Bo Q6 cost ROI Firefox 5 0.180 1.0
12 103 Al Q1 performance timely Safari 1 0.120 NaN
13 103 Al Q2 performance diversity Safari 4 0.215 NaN
14 103 Al Q3 performance knowledge Safari 1 0.200 NaN
15 103 Al Q4 performance transparency Safari 2 0.150 NaN
16 103 Al Q5 cost budgeting Safari 1 0.135 NaN
17 103 Al Q6 cost ROI Safari 2 0.180 NaN
18 104 Bo Q1 performance timely Chrome 1 0.120 NaN
19 104 Bo Q2 performance diversity Chrome 5 0.215 NaN
20 104 Bo Q3 performance knowledge Chrome 1 0.200 NaN
21 104 Bo Q4 performance transparency Chrome 5 0.150 NaN
22 104 Bo Q5 cost budgeting Chrome 1 0.135 NaN
23 104 Bo Q6 cost ROI Chrome 5 0.180 NaN