Question

我在SAS中有一个面板数据集，看起来像这样：

DATA have; 

INPUT id time income; 

CARDS; 
1 2008 1000 
1 2009 900 
1 2010 1100
2 2008 600
2 2009 500 
2 2010 400
3 2008 300
3 2009 350
3 2010 250
; 
RUN;

对于每个人，我想创建一个新列（名为income_id），其中包含所有时间段内该个人的收入，并为所有其他个人创建零。基本上我想要的是这个：

DATA want; 
  INPUT id time income income_1 income_2 income_3; 
CARDS; 
1 2008 1000 1000  0   0
1 2009 900  900   0   0
1 2010 1100 1100  0   0
2 2008 600   0   600  0
2 2009 500   0   500  0
2 2010 400   0   400  0
3 2008 300   0    0  300
3 2009 350   0    0  350
3 2010 250   0    0  250
; 
RUN;

由于

Answer 1

直观的方法是使用宏。

Yunchao Tian有一个非常好的SUGI解释了如何执行此任务here。

我在这里为你调整了代码。我测试了它似乎工作正常。

proc sort data=have out=unique nodupkey;
    by id;
run;

/* assign the largest value of id to the macro variable NMAX */
data _null_;
    set unique end=last;
    if last then call symput('NMAX', PUT(id, 3.));
run;

/* create all macro variables and assign value 0*/
data _null_;
    do i=1 to &NMAX;
        call symput('M'||LEFT(PUT(i,3.)), '0' );
    end;
run;

/* assign the value of id to the corresponding macro variable */
data _null_;
    set have;
    call symput('M'||LEFT(PUT(id,3.)), PUT(id,3.));
run;

/* macro to create code to set col to income or zero */
%MACRO GETID;
%DO I = 1 %TO &NMAX;
    %IF &&M&I = 0 %THEN %GOTO OUT;
        IF ID = &&M&I THEN income_&I = income;
        ELSE income_&I = 0;
    %OUT: %END;
%MEND GETID; 

/* Execute the macro */
DATA want;
SET have;
    %GETID
RUN;

PROC PRINT DATA=want;
RUN;

Answer 2

/* find min and max id for array boundaries, if ID is numeric */

proc sql noprint;
select put(min(id), 16. -L), put(max(id), 16. -L) into :minId, :maxId
from have
;
quit;

/* with zero-ing the other variables, could be slow if lots of distinct IDs */
data want1;
set have;
array arr_income income_&minId - income_&maxId;
    do i=&minId to &maxId;
        if i = id then  arr_income[id] = income;
        else arr_income[i] = 0;
    end;
run;

/* without zero-ing the other variables */
data want2;
set have;
array arr_income income_&minId - income_&maxId;
arr_income[id] = income;
run;

注意：他声明array arr_income income_&minId - income_&maxId;将为min和max之间的所有数字创建变量income_<i>，也不存在。

Answer 3

DATA have; 

INPUT id time income; 

CARDS; 
1 2008 1000 
1 2009 900 
1 2010 1100
2 2008 600
2 2009 500 
2 2010 400
3 2008 300
3 2009 350
3 2010 250
; 
RUN; 

proc sql;
select count(distinct(id)) into :count from have;
select distinct(id) into :id1 - :id%left(&count) from have;
quit;

%put &id1 &id2 &id3;

options mprint;

%macro test;
data have2;
set have ;
by id time;
%do i=1 %to &count;
if  id= &&id&i then income_&i=income;else income_&i=0;
%end;
run;
%mend;

%test;

proc print data=have2;
run;

在SAS中创建新变量

3 个答案: