合并格式化和未格式化的数据集

时间:2013-11-18 17:26:28

标签: sas

我正在尝试合并以下两个表,但是当一个'格式化'时,两个表中的标识符都不匹配。如何基于格式化值合并,或者,当PROC TABULATE数据输出到数据集时,只获取聚合的原始数据而不是应用了格式的原始数据?

proc sql; create table first(eventtime num,node char(100), stake num, misc num); quit;
proc sql;  insert into first (eventtime, node, stake, misc)                                                                                         
    values(1, '', 10, 59867984)
    values(2, 'L', 2,349587)
    values(2, 'W', 30,345345)
    values(3, 'LL',40,345345)
    values(3, "LW/WL",50,145345)
    values(3, 'WW',60,3245234)
    values(4, 'LLL',70,675)
    values(4, 'LLW/LWL/LWW',80,2342546)
    values(4, "WLL/WLW/WWL",90,2345)
    values(4, 'WWW',100,2342567)
    values(5, 'LLLL',110,34534534)
    values(5, "LLLW/LLWL/LLWW/LWLL",120,8767)
    values(5, "LWLW/LWWL/LWWW/WLLL/WLLW/WLWL",130,1342345)
    values(5, "WLWW/WWLL/WWLW/WWWL",140,467876)
    values(5, 'WWWW',150,1345);                                                                                                                                                                                                         
quit; 

proc sql; create table second(eventtime num,node char(29), stake num); quit;
proc sql;  insert into second (eventtime, node, stake)                                                                                         
    values(1, '',11)
    values(2, 'L',21)
    values(2, 'W',31)
    values(3, 'LL',41)
    values(3, "LW",51)
    values(3, "WL",51)
    values(3, 'WW',61)
    values(4, 'LLL',71)
    values(4, 'LLW',81)
    values(4, 'LWL',81)
    values(4, 'LWW',81)
    values(4, "WLL",91)
    values(4, "WLW",91)
    values(4, "WWL",91)
    values(4, 'WWW',101)
    values(5, 'LLLL',111)
    values(5, "LLLW",121)
    values(5, "LLWL",121)
    values(5, "LLWW",121)
    values(5, "LWLL",121)
    values(5, "LWLW",131)
    values(5, "LWWL",131)
    values(5, "LWWW",131)
    values(5, "WLLL",131)
    values(5, "WLLW",131)
    values(5, "WLWL",131)
    values(5, "WLWW",141)
    values(5, "WWLL",141)
    values(5, "WWLW",141)
    values(5, "WWWL",141)
    values(5, 'WWWW',151);                                                                                                                                                                                                         
quit; 

proc format lib=work; value $node_group  (NOTSORTED)
    'LW', 'WL'="LW/WL"
    'LLW', 'LWL', 'LWW'='LLW/LWL/LWW'
    'WLL', 'WLW', 'WWL'="WLL/WLW/WWL"
    'LLLW', 'LLWL', 'LLWW','LWLL'="LLLW/LLWL/LLWW/LWLL"
    'LWLW', 'LWWL','LWWW','WLLL','WLLW','WLWL'="LWLW/LWWL/LWWW/WLLL/WLLW/WLWL"
    'WLWW','WWLL','WWLW','WWWL'="WLWW/WWLL/WWLW/WWWL"
;quit;

proc tabulate data=second out=crosstaboutput missing; *classdata=foo ;
    title ' ';
    var stake;
    class eventTime node;
    table eventTime="B6et"*node="Node",stake="Sumstake"*SUM;
    *table eventTime="Bet"*node="Node",stake="Stake"*(N Median*f=10.2);
    format node $node_group.;
run;

*Only picks up those 'nodes' upon which no format has been applied;
data third;
  update first(in=H1) crosstaboutput(in=H2);
  by node;
  if H1 and H2;
run;
proc print;run;

/*This doesn't work at all */
proc sort data=first; by node; run;
proc sort data=crosstaboutput(drop=_: rename=()); by node; run;
data third;
  merge first crosstaboutput;
  by node;
run;
proc print;run;

1 个答案:

答案 0 :(得分:1)

data crosstaboutput_fmt;
    set crosstaboutput (rename=(node=node_orig));
    length node $29;
    node = put(node_orig, $node_group.);
run;

将原始未格式化的节点重命名为node_orig,并创建包含原始节点变量格式化值的新节点变量:

length node $29;
node = put(node_orig, $node_group.);