在SAS中,如何转置表格以为列中的每个唯一值生成一个虚拟变量

时间:2018-12-11 23:22:48

标签: sas

使用SAS,我试图将数据转置到表中,以使变量ClassSubclass的每个唯一值都由变量ID变为虚拟变量。

拥有:

ID        Class        Subclass         
-------------------------------   
ID1        1           1a          
ID1        1           1b           
ID1        1           1c           
ID1        2           2a

ID2        1           1a           
ID2        1           1b           
ID2        2           2a           
ID2        2           2b              
ID2        3           3a

ID3        1           1a                      
ID3        1           1d 
ID3        2           2a
ID3        3           3a           
ID3        3           3b  

想要:

ID    Class_1    Class_2    Class_3    Subclass_1a  ...    Subclass_3b         
----------------------------------------------------...---------------   
ID1   1          1          0          1            ...    0
ID2   1          1          1          1            ...    0
ID3   1          1          1          1            ...    0

我尝试在转置过程的ID语句中使用变量IDClassSubclass来转置数据。但是,这会产生由ClassSubclass的值的唯一组合的串联组成的变量。在转置过程中未定义VAR的情况下,该方法也不会产生0和1的值。

我是否需要先创建实际的虚拟变量,然后再对数据进行转置以获取需求表,还是有更直接的方法?

3 个答案:

答案 0 :(得分:1)

您还可以进行区分并为每个变量使用转置并将其合并回去。

  data have;
 input ID  $      Class  $      Subclass   $  ;
 datalines;      
 ID1        1           1a          
 ID1        1           1b           
 ID1        1           1c           
 ID1        2           2a
 ID2        1           1a           
 ID2        1           1b           
 ID2        2           2a           
 ID2        2           2b              
 ID2        3           3a
 ID3        1           1a                      
 ID3        1           1d 
 ID3        2           2a
 ID3        3           3a           
 ID3        3           3b  
 ;

  proc sql;
  create table want1 as 
  select distinct id, class from have;

 proc transpose data = want1 out=want1a(drop =_name_) prefix = class_;
  by id;
  id class;
  var class;
   run;

   proc sql;
   create table want2 as 
   select distinct id, subclass from have;

   proc transpose data = want2 out=want2a(drop =_name_) prefix = Subclass_;
   by id;
   id subclass;
    var Subclass;
     run;

 data want;
merge want1a want2a;
by id;
 array class(*) class_: subclass_:;
do i = 1 to dim(class);
 if missing(class(i)) then class(i)= "0";
 else class(i) ="1"; 
end; 
drop i;
run;

答案 1 :(得分:1)

似乎您需要PROC TRANSREG的帮助才能生成精简的设计矩阵。

data id;
   infile datalines firstobs=3;
   input ID :$3. class subclass :$2.;
   datalines;
ID        Class        Subclass
-------------------------------
ID1        1           1a
ID1        1           1b
ID1        1           1c
ID1        2           2a
ID2        1           1a
ID2        1           1b
ID2        2           2a
ID2        2           2b
ID2        3           3a
ID3        1           1a
ID3        1           1d
ID3        2           2a
ID3        3           3a
ID3        3           3b
;;;;
   run;
proc print;
   run;
proc transreg;
   id id;
   model class(class subclass / zero=none);
   output design out=dummy(drop=class subclass);
   run;
proc print;
   run;
proc summary nway;
   class id;
   output out=want(drop=_type_) max(class: subclass:)=;
   run;
proc print;
   run;

enter image description here

答案 2 :(得分:0)

这是一些棘手的代码生成,它使用哈希将值映射到与表示<name>_<value>存在状态的标志变量相对应的数组索引中

data have;
input ID $ Class Subclass $; datalines;
ID1 1 1a 
ID1 1 1b 
ID1 1 1c 
ID1 2 2a

ID2 1 1a 
ID2 1 1b 
ID2 2 2a 
ID2 2 2b 
ID2 3 3a

ID3 1 1a 
ID3 1 1d 
ID3 2 2a
ID3 3 3a 
ID3 3 3b 
run;

* create indexed name_value data for variable name construction and hash initialization;
proc sql ; * fresh proc to reset within proc monotonic tracker;
  create table map1 as 
  select class, monotonic() as index 
  from (select distinct class from have);

proc sql noprint;
  create table map2 as
  select subclass, monotonic() as index
  from (select distinct subclass from have);

* populate macro variable with pdv target variable names to be arrayed;
proc sql noprint;
  select catx('_','class',class) 
  into :map1vars separated by ' '
  from map1 order by index;

  select catx('_','subclass',subclass)
  into :map2vars separated by ' '
  from map2 order by index; 

* group wise flag <variable>_<value> combinations;
data want;
  if _n_ = 1 then do;
    if 0 then set map1 map2; * prep pdv with hash variables;
    declare hash map1(dataset:'map1');
    declare hash map2(dataset:'map2');
    map1.defineKey('class');
    map1.defineData('index');
    map1.defineDone();
    map2.defineKey('subclass');
    map2.defineData('index');
    map2.defineDone();
  end;

  * group wise flag pivot vars (existential extrusion);
  do until (last.id);
    set have;
    by id;
    array map1_ &map1vars; * array for <name>_<value> combinations;
    array map2_ &map2vars;

    * use hash lookup on value to find index into target array;
    map1.find(); put index=; map1_[index] = 1;
    map2.find(); put index=; map2_[index] = 1;
  end;
  keep id &map1vars &map2vars;
run;

过程REPORT可以显示值across以及组中出现的次数。

proc report data=have;
  define id / group;
  define class / across;
  define subclass / across;
run;