从时间段列创建图的关键错误

时间:2018-12-01 17:45:05

标签: python-3.x pandas numpy

我有下面的示例数据的代码和数据。在代码中,我创建了3个函数。第一个通过分类变量列表绘制连续变量的分布。第二个为数据中的时间段列表创建列。然后,第三个将它们组合起来,以一列时间周期列的形式绘制连续变量的分布图。我已经分别测试了前两个功能,它们按预期工作,但是当我尝试运行第三个功能时,出现以下关键错误。如果有人能看到问题所在或提出解决方法的建议,我将不胜感激。

样本数据:

  Unnamed: 0  call_history_id                            calllog_id  \
0       16358       1210746736  ca58d850-6fe6-4673-a049-ea4a2d8d7ecf   
1       16361       1210976828  c005329b-955d-4d88-98a5-1c47e6a1cb80   
2       16402       1217791595  050e9b83-54c2-4c87-abdd-32225c0d3189   
3       16471       1228495414  45705ed1-a8e2-4a15-8941-5b0a40b7d409   
4       27906       1245173592  04e56818-04a0-4704-ac86-31c31dac2370   

        call_id  connection_id  pbx_name    pbx_id  extension_number  \
0  1.509170e+12   1.509170e+12  sales8x8  sales8x8               595   
1  1.509170e+12   1.509170e+12  sales8x8  sales8x8               595   
2  1.509170e+12   1.509170e+12  sales8x8  sales8x8               595   
3  1.509170e+12   1.509170e+12  sales8x8  sales8x8               595   
4  1.509170e+12   1.509170e+12  sales8x8  sales8x8               595   

  extension_id   customer_id      address                 name  \
0          595  2.525100e+29  14086694428           Sun Basket   
1          595  2.525100e+29  13214371589          PEREZ,BRYAN   
2          595  2.525100e+29  14088566290          14088566290   
3          595  2.525100e+29   8059316676              Dialing   
4          595  2.525100e+29  12028071151  Implementation Team   

  start_timestamp direction  call_internal  call_missed  duration  \
0    1/8/18 19:49         I            0.0          0.0    4414.0   
1    1/8/18 20:09         I            0.0          0.0    8300.0   
2    1/9/18 20:31         I            0.0          0.0   14766.0   
3   1/11/18 17:16         I            0.0          0.0    1686.0   
4   1/15/18 22:55         I            0.0          0.0    3491.0   

  device_model  group_call group_name group_number           device_id  \
0  mediaserver         0.0          N            N  MasterSlaveService   
1  mediaserver         0.0          N            N  MasterSlaveService   
2  mediaserver         0.0          N            N  MasterSlaveService   
3  mediaserver         0.0          N            N  MasterSlaveService   
4  mediaserver         0.0          N            N  MasterSlaveService   

  history_event_state   created_time   updated_time group_type  
0                   A   1/8/18 19:49   1/8/18 19:49          N  
1                   A   1/8/18 20:09   1/8/18 20:09        NaN  
2                   A   1/9/18 20:31   1/9/18 20:31          N  
3                   A  1/11/18 17:16  1/11/18 17:16          N  
4                   A  1/15/18 22:55  1/15/18 22:55          N  

代码:

# plot all continuous variables by all categorical variables
# get distributions



def plotter(ca_col,co_col,d_df,p_typ='boxplot'):
    if ca_col is None:
        for i in cont_col:
            if p_typ=='boxplot':
                data_df.boxplot(column=i)
            elif p_typ=='hist':
                data_df.hist(column=i)
    elif ca_col!=None:    
        for j in ca_col:
            for i in cont_col:
                if p_typ=='boxplot':
                    data_df.boxplot(column=i,by=j)
                elif p_typ=='hist':
                    data_df.hist(column=i,by=j)


# Create column for specified time periods in data


def ts_periods(f_nm, d_list, d_df):
    t_df=d_df.copy()

    for i in d_list:
        if i=='year':
            t_df[f_nm+'_year']=pd.DatetimeIndex(t_df[f_nm]).year
        elif i=='month':
            t_df[f_nm+'_month']=pd.DatetimeIndex(t_df[f_nm]).month
        elif i=='weekday':
            t_df[f_nm+'_weekday']=pd.DatetimeIndex(t_df[f_nm]).weekday_name
        elif i=='week' in d_list:
            t_df[f_nm+'_week']=pd.DatetimeIndex(t_df[f_nm]).week
        elif i=='hour':
            t_df[f_nm+'_hour']=pd.DatetimeIndex(t_df[f_nm]).hour
        elif i=='minute':
            t_df[f_nm+'_minute']=pd.DatetimeIndex(t_df[f_nm]).minute
    return t_df


# Create distribution plots of continuous variables by time periods in data

def ts_plotter(ts_pdf, ts_plist, cont_list, plt_typ, tser):

    ts_cat=[tser+'_'+i for i in ts_plist]

    op_df=ts_periods(f_nm=tser, d_list=ts_plist, d_df=ts_pdf)

    plotter(ca_col=ts_cat,co_col=cont_list,d_df=op_df,p_typ=plt_typ)


ts_plotter(ts_pdf=data_df, ts_plist=['year','minute'], cont_list=['duration'], plt_typ='boxplot', tser='updated_time')

错误:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-14-ee29a16ef8de> in <module>()
----> 1 ts_plotter(ts_pdf=data_df, ts_plist=['year','minute'], cont_list=['duration'], plt_typ='boxplot', tser='updated_time')

<ipython-input-12-c7fab8e851b9> in ts_plotter(ts_pdf, ts_plist, cont_list, plt_typ, tser)
      5     op_df=ts_periods(f_nm=tser, d_list=ts_plist, d_df=ts_pdf)
      6 
----> 7     plotter(ca_col=ts_cat,co_col=cont_list,d_df=op_df,p_typ=plt_typ)

<ipython-input-7-f618189e8627> in plotter(ca_col, co_col, d_df, p_typ)
     15             for i in cont_col:
     16                 if p_typ=='boxplot':
---> 17                     data_df.boxplot(column=i,by=j)
     18                 elif p_typ=='hist':
     19                     data_df.hist(column=i,by=j)

~/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/plotting/_core.py in boxplot_frame(self, column, by, ax, fontsize, rot, grid, figsize, layout, return_type, **kwds)
   2255     ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize,
   2256                  grid=grid, rot=rot, figsize=figsize, layout=layout,
-> 2257                  return_type=return_type, **kwds)
   2258     plt.draw_if_interactive()
   2259     return ax

~/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/plotting/_core.py in boxplot(data, column, by, ax, fontsize, rot, grid, figsize, layout, return_type, **kwds)
   2224                                          by=by, grid=grid, figsize=figsize,
   2225                                          ax=ax, layout=layout,
-> 2226                                          return_type=return_type)
   2227     else:
   2228         if return_type is None:

~/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/plotting/_core.py in _grouped_plot_by_column(plotf, data, columns, by, numeric_only, grid, figsize, ax, layout, return_type, **kwargs)
   2664                             figsize=None, ax=None, layout=None,
   2665                             return_type=None, **kwargs):
-> 2666     grouped = data.groupby(by)
   2667     if columns is None:
   2668         if not isinstance(by, (list, tuple)):

~/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/core/generic.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, **kwargs)
   6663         return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
   6664                        sort=sort, group_keys=group_keys, squeeze=squeeze,
-> 6665                        observed=observed, **kwargs)
   6666 
   6667     def asfreq(self, freq, method=None, how=None, normalize=False,

~/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/core/groupby/groupby.py in groupby(obj, by, **kwds)
   2150         raise TypeError('invalid type: %s' % type(obj))
   2151 
-> 2152     return klass(obj, by, **kwds)
   2153 
   2154 

~/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/core/groupby/groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, **kwargs)
    597                                                     sort=sort,
    598                                                     observed=observed,
--> 599                                                     mutated=self.mutated)
    600 
    601         self.obj = obj

~/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/core/groupby/groupby.py in _get_grouper(obj, key, axis, level, sort, observed, mutated, validate)
   3289                 in_axis, name, level, gpr = False, None, gpr, None
   3290             else:
-> 3291                 raise KeyError(gpr)
   3292         elif isinstance(gpr, Grouper) and gpr.key is not None:
   3293             # Add key to exclusions

KeyError: 'updated_time_year'

0 个答案:

没有答案