我有以下SQL查询,该查询创建一个新表,按用户ID汇总每个访问次数。如何在python中创建此数据框?
create table User_Visits_summary as
select user_id,
sum(case when visit_type = 1 then 1 else 0 end) as Type_One_Counts,
sum(case when visit_type = 2 then 1 else 0 end) as Type_Two_Counts,
sum(case when visit_type = 3 then 1 else 0 end) as Type_Three_Counts,
count(*) as Total_Visits
from user_visits
group by user_id
答案 0 :(得分:0)
下面的代码应该创建与SQL查询相同的表。阅读代码中的注释,并在调试模式下执行,以更好地了解每行代码的作用。有关Pandas功能的有用指南,请查看此cheatsheet-
https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf
import pandas as pd
# example dataset
user_visits = pd.DataFrame({'user_id' :['A','A','A','A','A','B','B','B','B'],
'visit_type':[ 1, 1, 3, 3, 3, 2, 2, 2, 2] })
# This summary table already contains the data you want, but on 'long column' format
User_Visits_summary = user_visits.groupby(['user_id','visit_type']).size().reset_index()
# Here we pivot the table to get to your desired format
User_Visits_summary = User_Visits_summary.pivot(index='user_id',columns='visit_type', values=0)
# Calculate total from sub-totals in new column
User_Visits_summary['Total_Visits'] = User_Visits_summary.sum(axis=1)
# Some formatting
User_Visits_summary.reset_index(inplace=True)
User_Visits_summary.rename(columns={1:'Type_One_Counts',
2:'Type_Two_Counts',
3:'Type_Three_Counts'}, inplace=True)
# Table ready
print(User_Visits_summary)
# ...too wide to paste...