我有一个按// C/C++ standard library
#include <vector>
#include <iostream>
#include <cstdlib>
using namespace std;
class A {
public:
double get_value(void) {
return value;
}
private:
double value;
};
// Forward declare A if split over files
class B {
public:
void assign_pointer(A class_a_to_assign) {
class_a = &class_a_to_assign; // assign the pointer the address to point to
}
void update_my_value(void) {
value_b += class_a->get_value();
}
double get_value(void) {
return value_b;
}
private:
double value_b = 0.1;
A* class_a; // pointer to class A
};
int main() {
cout << "hello world" << endl;
// create 2 instances of B there could be thousands of these tho.
B b1;
B b2;
// create 1 instance of A
A a1;
// Now I want both instances of the B class to point to the one instance of A
b1.assign_pointer(a1);
b2.assign_pointer(a1);
// THen do stuff with B so that if any changes occur in A, then they can be automatically updated in class B through the pointer
b1.update_my_value();
b2.update_my_value();
cout << b1.get_value() << " and " << b2.get_value() << endl;
return 0;
}
分组的df。对于每个SELECT e.*
FROM employee e
JOIN (SELECT employee_id, COUNT(*) AS employee_tours
FROM employee_tour
GROUP BY employee_id) et ON e.employee_id = et.employee_id
CROSS JOIN (SELECT COUNT(*) AS all_tours
FROM tour) t
WHERE employee_tours = all_tours
组,我想返回列id
大于该组id
平均值的所有行。我尝试了以下方法:
a
这引发了一个ValueError:重复级别名称:“id”,分配给级别1,已经用于级别0。
我做错了什么?
答案 0 :(得分:3)
使用transform
与原始Series
相同的DataFrame
,以获得更好的效果,例如apply
解决方案:
df = df[df['a'] > df.groupby("id")['a'].transform('mean')]
print (df)
a b c
id
2 5 4 3
2 6 3 2
1 7 2 3
3 8 1 0
3 9 0 5
<强>详细强>:
print (df.groupby("id")['a'].transform('mean'))
id
1 4.75
1 4.75
1 4.75
3 3.50
3 3.50
1 4.75
1 4.75
1 4.75
1 4.75
1 4.75
Name: a, dtype: float64
在您的解决方案中,需要参数group_keys=False
以避免具有相同级别名称的MultiIndex
,因为索引名称中的id
:
df = df.groupby("id", group_keys=False).apply(lambda x: x[x.a > x.a.mean()])
如果第一个reset_index()
获取列名称id
和索引名称id
,但值相同:
df = df.reset_index().groupby("id").apply(lambda x: x[x.a > x.a.mean()])
print (df)
id a b c
id
2 6 2 6 3 3
7 2 7 2 9
9 2 9 0 1
3 5 3 5 4 9
8 3 8 1 8
另一项测试 - 删除index name
- id
:
df = df.rename_axis(None)
print (df)
a b c
3 0 9 2
2 1 8 2
1 2 7 6
3 3 6 1
1 4 5 3
2 5 4 9
3 6 3 6
2 7 2 1
1 8 1 0
1 9 0 1
df = df.groupby(level=0).apply(lambda x: x[x.a > x.a.mean()])
print (df)
a b c
1 1 8 1 0
1 9 0 1
2 2 5 4 9
2 7 2 1
3 3 6 3 6