我有一个这样的数据框
class FirebaseData : ObservableObject {
@Published var posts = [Post]()
let postsCollection = Firestore.firestore().collection("Posts")
init() {
self.fetchPosts()
}
//MARK: Fetch Data
private func fetchPosts() {
self.postsCollection.addSnapshotListener { (documentSnapshot, err) in
if err != nil {
print("Error fetching posts: \(err!.localizedDescription)")
return
} else {
documentSnapshot!.documentChanges.forEach { diff in
if diff.type == .added {
let post = self.createPostFromDocument(document: diff.document)
self.posts.append(post)
} else if diff.type == .modified {
self.posts = self.posts.map { (post) -> Post in
if post.id == diff.document.documentID {
return self.createPostFromDocument(document: diff.document)
} else {
return post
}
}
} else if diff.type == .removed {
for index in self.posts.indices {
if self.posts[index].id == diff.document.documentID {
self.posts.remove(at: index)
}
}
}
}
}
}
}
可以使用以下代码制作
col1 col2 col3 col4
id Category
a blue 4 1 3 0
red 1 0 0 4
b red 0 1 8 5
我希望输出看起来像这样
df = pd.DataFrame({ 'id': ['a','a','b'],'Category': ['red','blue','red'], 'col1': [1,4,0], 'col2': [0,1,1],'col3' : [0,3,8], 'col4': [4,0,5]})
sum_df = df.groupby(['id','Category']).agg({'col1': 'sum', 'col2': 'sum','col3': 'sum', 'col4': 'sum'})
我希望每个 id red_col1 red_col2 red_col3 red_col4 blue_col1 blue_col2 blue_col3 blue_col4
0 a 1 0 0 4 4.0 1.0 3.0 0.0
1 b 0 1 8 5 NaN NaN NaN NaN
在一行中都是唯一的,并将值汇总到相应的列中。数据集中有1000个ID,类别也有1000个。如果某列没有与之关联的值,则该列应为空白,即id
。
答案 0 :(得分:2)
使用DataFrame.unstack
,然后重命名列:
new_df = df.unstack('Category')
new_df.columns = [f'{color}_{col}' for col, color in new_df.columns]
new_df=new_df.sort_index(axis=1).reset_index()
print(new_df)
id blue_col1 blue_col2 blue_col3 blue_col4 red_col1 red_col2 \
0 a 4.0 1.0 3.0 0.0 1.0 0.0
1 b NaN NaN NaN NaN 0.0 1.0
red_col3 red_col4
0 0.0 4.0
1 8.0 5.0
答案 1 :(得分:1)
IIUC
s=df.unstack().sort_index(level=1,axis=1)
s.columns=s.columns.map('{0[1]}_{0[0]}'.format)
s
Out[136]:
blue_col1 blue_col2 blue_col3 ... red_col2 red_col3 red_col4
id ...
a 4.0 1.0 3.0 ... 0.0 0.0 4.0
b NaN NaN NaN ... 1.0 8.0 5.0
[2 rows x 8 columns]
答案 2 :(得分:0)
使用取消堆叠
df = df.unstack()
然后您可以添加前缀
df = df.add_prefix(category'_')
这将解决