我在SAS中有一个面板数据集,看起来像这样:
DATA have;
INPUT id time income;
CARDS;
1 2008 1000
1 2009 900
1 2010 1100
2 2008 600
2 2009 500
2 2010 400
3 2008 300
3 2009 350
3 2010 250
;
RUN;
对于每个人,我想创建一个新列(名为income_id),其中包含所有时间段内该个人的收入,并为所有其他个人创建零。基本上我想要的是这个:
DATA want;
INPUT id time income income_1 income_2 income_3;
CARDS;
1 2008 1000 1000 0 0
1 2009 900 900 0 0
1 2010 1100 1100 0 0
2 2008 600 0 600 0
2 2009 500 0 500 0
2 2010 400 0 400 0
3 2008 300 0 0 300
3 2009 350 0 0 350
3 2010 250 0 0 250
;
RUN;
由于
答案 0 :(得分:1)
直观的方法是使用宏。
Yunchao Tian有一个非常好的SUGI解释了如何执行此任务here。
我在这里为你调整了代码。我测试了它似乎工作正常。
proc sort data=have out=unique nodupkey;
by id;
run;
/* assign the largest value of id to the macro variable NMAX */
data _null_;
set unique end=last;
if last then call symput('NMAX', PUT(id, 3.));
run;
/* create all macro variables and assign value 0*/
data _null_;
do i=1 to &NMAX;
call symput('M'||LEFT(PUT(i,3.)), '0' );
end;
run;
/* assign the value of id to the corresponding macro variable */
data _null_;
set have;
call symput('M'||LEFT(PUT(id,3.)), PUT(id,3.));
run;
/* macro to create code to set col to income or zero */
%MACRO GETID;
%DO I = 1 %TO &NMAX;
%IF &&M&I = 0 %THEN %GOTO OUT;
IF ID = &&M&I THEN income_&I = income;
ELSE income_&I = 0;
%OUT: %END;
%MEND GETID;
/* Execute the macro */
DATA want;
SET have;
%GETID
RUN;
PROC PRINT DATA=want;
RUN;
答案 1 :(得分:0)
/* find min and max id for array boundaries, if ID is numeric */
proc sql noprint;
select put(min(id), 16. -L), put(max(id), 16. -L) into :minId, :maxId
from have
;
quit;
/* with zero-ing the other variables, could be slow if lots of distinct IDs */
data want1;
set have;
array arr_income income_&minId - income_&maxId;
do i=&minId to &maxId;
if i = id then arr_income[id] = income;
else arr_income[i] = 0;
end;
run;
/* without zero-ing the other variables */
data want2;
set have;
array arr_income income_&minId - income_&maxId;
arr_income[id] = income;
run;
注意:他声明array arr_income income_&minId - income_&maxId;
将为min和max之间的所有数字创建变量income_<i>
,也不存在。
答案 2 :(得分:0)
DATA have;
INPUT id time income;
CARDS;
1 2008 1000
1 2009 900
1 2010 1100
2 2008 600
2 2009 500
2 2010 400
3 2008 300
3 2009 350
3 2010 250
;
RUN;
proc sql;
select count(distinct(id)) into :count from have;
select distinct(id) into :id1 - :id%left(&count) from have;
quit;
%put &id1 &id2 &id3;
options mprint;
%macro test;
data have2;
set have ;
by id time;
%do i=1 %to &count;
if id= &&id&i then income_&i=income;else income_&i=0;
%end;
run;
%mend;
%test;
proc print data=have2;
run;