数据转换使用Proc Transpose或更简单的程序

时间:2017-05-17 02:59:26

标签: sql sas

我有一个数据集:

Period    Store   Item    feature_1       feature_2
 JAN        A      a1        3               4
 JAN        A      a2        4               9
 JAN        A      a3        2               1
 JAN        A      a4        4               9
 FEB        A      a2        4               9
 JAN        B      a2        3               1
 FEB        B      b2        4               9 
.....

我想获取数据集:

 Period    Store   a1_feature_1    a1_feature_2      a2_feature_1      a2_feature_2....
 JAN        A         3               4                   4                9
 FEB        A         .               .                   4                9
 JAN        B         .               .                   3                1

其中最终数据集在每个时段期间每个观察包含每个出口,同时在同一观察中将每个项目的所有特征组合在一起。

我最初的猜测是尝试首先使用宏来创建变量a1_feature_1,a1_feature_2,a2_feature_1,a2_feature_2 ....

然后使用proc sql group by在Store和period期间折叠。

我想知道是否可以使用proc transpose,sql来完成,还是会有其他更简单的步骤来转换这些数据?

2 个答案:

答案 0 :(得分:1)

以下是执行此操作的一种方法:

data have;
input (Period  Store Item) ($) feature_1 feature_2; cards;
  JAN        A      a1        3               4
  JAN        A      a2        4               9
  JAN        A      a3        2               1
  JAN        A      a4        4               9
  FEB        A      a2        4               9
  JAN        B      a2        3               1
  FEB        B      b2        4               9
;
run;

proc sql noprint;
select distinct cats(item,'_feature1'),cats(item,'_feature2'),
into :item_list1 separated by ' ', :item_list2 separated by ' '
from have;
quit;

data want;
  do until(last.period);
    set have;
    by store period notsorted;
    array f1[*] &item_list1;
    array f2[*] &item_list2;
    do i = 1 to dim(f1);
      if vname(f1[i]) eq: trim(item) then do;
        f1[i] = feature_1;
        f2[i] = feature_2;
      end;
    end;
  end;
  drop i feature_1 feature_2;
run;

N.B。这并没有给出问题中显示的列顺序,但如果您愿意,可以使用一些额外的逻辑轻松修复它。此外,用于定义数组的宏变量只能为几千个项目保留足够的变量名称。

答案 1 :(得分:0)

您还可以将所有feature_变量放入列表中,使用每个变量转置数据并命名后缀,然后合并在一起。使用此方法,您无需手动输入所有feature_变量,因为sql方法适合您:

data test;
length Period    Store   Item    $5 feature_1       feature_2 8;
input Period $ Store $ Item $ feature_1 feature_2;
datalines;
 JAN        A      a1        3               4
 JAN        A      a2        4               9
 JAN        A      a3        2               1
 JAN        A      a4        4               9
 FEB        A      a2        4               9
 JAN        B      a2        3               1
 FEB        B      b2        4               9 
 ;
run;

proc sort data = test;
    by PERIOD STORE;
run;

** how many feature_ vars do I have? **;
proc sql noprint;
    create table features as
    select NAME 
    from dictionary.columns
    where libname="WORK" and memname="TEST" and index(NAME,"feature");

    ** put them into a list to loop over **;
    select NAME 
    into: feature_list separated by " "
    from features;
quit;

%put &feature_list.;

** transpose data using each feature_ variable then merge when finished **;
%MACRO loop_over(feature_list);
%do i=1 %to %sysfunc(countw(&feature_list.));
    %let feature=%scan(&feature_list.,&i.);
    proc transpose data = test out=trans_&feature.(drop=_NAME_) SUFFIX=_&feature.;
        by PERIOD STORE;
        id ITEM;
        var &feature.;
    run;
%end;

data merged;
    merge trans_:;
    by PERIOD STORE;
run;
%MEND;

%loop_over(&feature_list.);