数据集上的自定义分组

时间:2013-10-15 17:15:18

标签: sas

我有以下根据变量'node'分组的数据。我想将一些数据折叠成组。有没有比以下更优雅的方式:

    proc sql;  
create table delete
       (t0avgStakeRank num,
        eventtime num,
        previousnode char(10),
bettors num);
quit;

proc sql;  
insert into delete                                                                                                                 
(t0avgStakeRank, eventtime, previousnode, bettors)                                                                                         
    values(0,1,' ',1000)
    values(0,2, 'L',500)
    values(0,2, 'W',500)
    values(0,3, 'LL',250)
    values(0,3, 'LW',250)
    values(0,3, 'WL',250)
    values(0,3, 'WW',250)
    values(0,4, 'LLL',125)
    values(0,4, 'LLW',125)
    values(0,4, 'LWL',125)
    values(0,4, 'LWW',125)
    values(0,4, 'WLL',125)
    values(0,4, 'WLW',125)
    values(0,4, 'WWL',125)
    values(0,4, 'WWW',125)
    values(0,5, 'LLLL',62.5)
    values(0,5, 'LLLW',62.5)
    values(0,5, 'LLWL',62.5)
    values(0,5, 'LLWW',62.5)
    values(0,5, 'LWLL',62.5)
    values(0,5, 'LWLW',62.5)
    values(0,5, 'LWWL',62.5)
    values(0,5, 'LWWW',62.5)
    values(0,5, 'WLLL',62.5)
    values(0,5, 'WLLW',62.5)
    values(0,5, 'WLWL',62.5)
    values(0,5, 'WLWW',62.5)
    values(0,5, 'WWLL',62.5)
    values(0,5, 'WWLW',62.5)
    values(0,5, 'WWWL',62.5)
    values(0,5, 'WWWW',62.5)
    values(1,1, '',1000)
    values(1,2, 'L',500)
    values(1,2, 'W',500)
    values(1,3, 'LL',250)
    values(1,3, 'LW',250)
    values(1,3, 'WL',250)
    values(1,3, 'WW',250)
    values(1,4, 'LLL',125)
    values(1,4, 'LLW',125)
    values(1,4, 'LWL',125)
    values(1,4, 'LWW',125)
    values(1,4, 'WLL',125)
    values(1,4, 'WLW',125)
    values(1,4, 'WWL',125)
    values(1,4, 'WWW',125)
    values(1,5, 'LLLL',62.5)
    values(1,5, 'LLLW',62.5)
    values(1,5, 'LLWL',62.5)
    values(1,5, 'LLWW',62.5)
    values(1,5, 'LWLL',62.5)
    values(1,5, 'LWLW',62.5)
    values(1,5, 'LWWL',62.5)
    values(1,5, 'LWWW',62.5)
    values(1,5, 'WLLL',62.5)
    values(1,5, 'WLLW',62.5)
    values(1,5, 'WLWL',62.5)
    values(1,5, 'WLWW',62.5)
    values(1,5, 'WWLL',62.5)
    values(1,5, 'WWLW',62.5)
    values(1,5, 'WWWL',62.5)
    values(1,5, 'WWWW',62.5);                                                                                                                                                                                                   
quit; 

proc sort data=delete; by t0AvgStakeRank; run;
proc tabulate data=delete missing;
    title ' ';
    var bettors;
    class t0avgstakerank eventTime previousnode;
    table t0avgstakerank=""*eventTime=" "*(previousnode=" "), bettors*sum;
run;

这就是我到目前为止这样做的方式,这样我就可以在各组之间汇总总和。

proc sql;
create table grouped as 
select distinct
t0avgstakerank,
eventtime,

case when previousnode in ( 'LW', 'WL') then "LW/WL"
when previousnode in ( 'LLW', 'LWL', 'LWW') then "LLW/LW*"
when previousnode in ( 'WLL', 'WLW', 'WWL') then  "WL*/WWL"
when previousnode in ( 'LLLW', 'LLWL', 'LLWW','LWLL') then "LLLW/LLW*/LWLL"
when previousnode in ( 'LWLW', 'LWWL','LWWW','WLLL','WLLW','WLWL') then "LWLW/LWW*/WLL*/WLWL"
when previousnode in ( 'WLWW','WWLL','WWLW','WWWL') then "WLWW/WWL*/WWWL"
else previousnode  end as previousnode_grouped,

case when previousnode in ( 'LW', 'WL') then (select sum(bettors) from delete as sub where previousnode in ( 'LW', 'WL') and sub.t0avgstakerank = main.t0avgstakerank) 
when previousnode in ( 'LLW', 'LWL', 'LWW') then (select sum(bettors) from delete as sub where previousnode in ( 'LLW', 'LWL', 'LWW') and sub.t0avgstakerank = main.t0avgstakerank) 
when previousnode in ( 'WLL', 'WLW', 'WWL') then (select sum(bettors) from delete as sub where previousnode in ( 'WLL', 'WLW', 'WWL') and sub.t0avgstakerank = main.t0avgstakerank) 
when previousnode in ( 'LLLW', 'LLWL', 'LLWW','LWLL') then (select sum(bettors) from delete as sub where previousnode in ( 'LLLW', 'LLWL', 'LLWW','LWLL') and sub.t0avgstakerank = main.t0avgstakerank) 
when previousnode in ( 'LWLW', 'LWWL','LWWW','WLLL','WLLW','WLWL') then (select sum(bettors) from delete as sub where previousnode in ( 'LWLW', 'LWWL','LWWW','WLLL','WLLW','WLWL') and sub.t0avgstakerank = main.t0avgstakerank) 
when previousnode in ( 'WLWW','WWLL','WWLW','WWWL') then (select sum(bettors) from delete as sub where previousnode in ('WLWW','WWLL','WWLW','WWWL') and sub.t0avgstakerank = main.t0avgstakerank) 
else bettors end as bettors_grp

  from delete main;

退出;

0 个答案:

没有答案