创建一个pandas数据框,其中包含按小时划分的骑行次数以及工作日和周末的用户类型

时间:2017-09-07 05:21:50

标签: python pandas dataframe

创建一个pandas DataFrame,其中包含工作日和周末的小时和用户类型的游乐设施数。使用starttime来确定每个骑行的时间。这是启动时间来自的CSV文件 https://drive.google.com/file/d/0B4KXs5bh3CmPWXJkQWhkbzI0WEE/view?usp=sharing 数据必须采用这种形式 pic

FirstOrDefault(); 

这是我计算整个星期(星期日)的小时数的代码。 我搜索了各种帖子并找到了

df = pd.DataFrame({'Customer':rides['starttime']})
rides['Customer'] = pd.to_datetime(df['Customer'])
df['User Type Hour'] = rides['Customer'].dt.hour
df2=df[rides['usertype']=="Customer"].groupby('User Type Hour').count()
df2
df5 = pd.DataFrame({'Subscriber':rides['starttime']})
rides['Subscriber'] = pd.to_datetime(df5['Subscriber'])
df5['User Type Hour'] = rides['Subscriber'].dt.hour
dfe=df5[rides['usertype']=="Subscriber"].groupby('User Type Hour').count()
dfe
#c= df2.style.set_table_styles([dict(selector="th",props=[('max-width', '100px')])])
frames=[df2,dfe]
#concatinate the dataframes
result=pd.concat(frames, axis=1, join='inner')
result

但没有得到结果。 小尺寸的CSV [文件链接] [2]

1 个答案:

答案 0 :(得分:0)

您可以使用:

  • 为转换列添加参数parse_dates到datetime

  • typetypes

    创建新系列

  • numpy.where

  • 创建新系列
  • hour hoursusertypedf = pd.read_csv('201507-citibike-tripdata.csv', parse_dates=[1,2]) types = np.where(df['starttime'].dt.dayofweek >= 5, 'Weekends', 'Workdays') hours = df['starttime'].dt.hour result = df.groupby([types, hours, 'usertype']).size().unstack() ,汇总groupby并重新塑造size

    < / LI>
print (result)


usertype            Customer  Subscriber
         starttime                      
Weekends 0              1079        3184
         1               609        2192
         2               429        1410
         ...
         21             2411        6207
         22             2192        5083
         23             1463        3555
Workdays 0              1385        6075
         1               768        2850
         2               442        1472
         ...
         23             2611       12607
df = df.reset_index() \
       .rename_axis(None, 1) \
       .rename(columns={'level_0':'type', 'starttime':'User Type Hour'})
print (df)

        type  User Type Hour  Customer  Subscriber
0   Weekends               0      1079        3184
1   Weekends               1       609        2192
...
23  Weekends              23      1463        3555
24  Workdays               0      1385        6075
25  Workdays               1       768        2850
...
46  Workdays              22      3311       19137
47  Workdays              23      2611       12607

一些数据清理:

type

如果需要在df = df.reset_index() \ .rename_axis(None, 1) \ .rename(columns={'level_0':'type', 'starttime':'User Type Hour'}) df['type'] = df['type'].mask(df['type'].duplicated(), '') print (df) type User Type Hour Customer Subscriber 0 Weekends 0 1079 3184 1 1 609 2192 2 2 429 1410 ... 22 22 2192 5083 23 23 1463 3555 24 Workdays 0 1385 6075 25 1 768 2850 26 2 442 1472 ... 46 22 3311 19137 47 23 2611 12607 列中省略值:

extension CALayer {
    func round(corners: UIRectCorner, withRadius radius: CGFloat, withBounds: CGRect? = nil) {
        let path = UIBezierPath(roundedRect: withBounds ?? bounds, byRoundingCorners: corners, cornerRadii: CGSize(width: radius, height: radius))
        let mask = CAShapeLayer()
        mask.path = path.cgPath
        self.mask = mask
    }
}