如何通过SAS选择SCD-Type2中的不匹配列

时间:2018-09-27 13:31:38

标签: sas

我们的SCD Type 2表几乎有200个表,每个表有250列。

对于EX:学生详细信息

STUDENT_ID VALID_FROM_DT  VALID_TO_DT        NAME    CITY   CONTACT_NO  BRANCH  
   1        04-April-2018   10-April-2018    XYZ     Chennai  12345     CSE
   1       10-April-2018   31-DEC-2055       XYZ     MUMBAI   87777     CSE

寻找某种通用解决方案,在这种情况下,对于这种情况,仅精确匹配不匹配的列,输出应为

输出:

STUDENT_ID VALID_FROM_DT  VALID_TO_DT         CITY   CONTACT_NO    
   1        04-April-2018  10-April-2018      Chennai  12345    
   1       10-April-2018   31-DEC-2055        MUMBAI   87777     

这种解决方案是否可能,如果可以,我可以将其用于我的所有200张桌子。

2 个答案:

答案 0 :(得分:0)

大量维表,每个维表具有大量属性列,这是一件大事。对于通用解决方案,您将需要使用元数据信息来获取库中数据集的列表,检查那些数据集以了解“ scd2-idness”的指示(例如,具有与数据集名称相似的ID列) 。对于每个scd2表,您将再次访问元数据以获得non-scd列(不是记录ID,维ID或有效日期范围的任何列)。然后,将通过数据集编写一个过程,并确定属性值与其先前有效日期范围相比有无变化的情况。

请考虑一些生成的具有任意属性名称(即列)名称的任意维名称的scd-2数据。组中的某些列被强制为“静态”(可能会考虑在您的问题中考虑的那些列)

%macro random_name(len=8);
  %local i result;
  %do i = 1 %to &len;
    %let result = &result.%sysfunc(byte(%sysfunc(ranuni(123))*26+65));
  %end;
  &result
%mend;

%macro make_data(lib=WORK, N=40);
  %local outcount i j p suffix out;
  %do outcount = 1 %to &N;
    %let out = dimtable_%random_name();
    %let idvar = &out._id;

    %local top cCount nCount cName nName namelen name;

    %let top  = %sysevalf ( 1000 * %sysfunc(ranuni(123)), FLOOR);
    %let nCount = %sysevalf ( 20 * %sysfunc(ranuni(123)), FLOOR);
    %let cCount = %sysevalf ( 20 * %sysfunc(ranuni(123)), FLOOR);

    %do i = 1 %to &nCount;
      %let namelen = %sysevalf(16 * %sysfunc(ranuni(123)), CEIL);
      %let name = ;
      %do j = 1 %to &namelen;
        %let name = &name.%sysfunc(byte(%sysfunc(ranuni(123))*26+65));
      %end;
      %local numvar&i;
      %let numvar&i = nattr&i._&name.;
    %end;

    %do i = 1 %to &cCount;
      %let namelen = %sysevalf(16 * %sysfunc(ranuni(123)), CEIL);
      %let name = ;
      %do j = 1 %to &namelen;
        %let name = &name.%sysfunc(byte(%sysfunc(ranuni(123))*26+65));
      %end;
      %local chrvar&i;
      %let chrvar&i = cattr&i._&name.;
    %end;

    data &out;
      do rowid = 1 to ⊤

        if rowid = 1 or ranuni(123) > 0.8 then do;
          &idvar + 1;

          valid_from_dt = 0;
          valid_to_dt   = '01-jan-1970'd + floor(60 * ranuni(123));

          format valid: yymmdd10.;

          attrib 
            %if &nCount %then %do; %do i = 1 %to &nCount; &&numvar&i %end; length=8 format=6. %end;
            %if &cCount %then %do; %do i = 1 %to &cCount; &&chrvar&i %end; length=$20 %end;
          ;

          %if &nCount %then %do; array num %do i = 1 %to &nCount;  &&numvar&i %end; ; %end;
          %if &cCount %then %do; array chr %do i = 1 %to &cCount;  &&chrvar&i %end; ; %end;

          array staticN[0:&nCount] _temporary_;
          array staticC[0:&cCount] _temporary_;

          do _n_ = 1 to hbound(staticN); staticN(_n_) = ranuni(123) < 0.40; end;
          do _n_ = 1 to hbound(staticC); staticC(_n_) = ranuni(123) < 0.40; end;

          do _n_ = 1 to dim(num);
            num(_n_) = CEIL (1000 * ranuni(123));
          end;
          do _n_ = 1 to dim(chr);
            chr(_n_) = repeat(byte(65+26*ranuni(123)), 15 * ranuni(123));
          end;
        end;

        valid_from_dt = valid_to_dt + 1;
        valid_to_dt   = valid_from_dt + ceil(60 * ranuni(123));

        do _n_ = 1 to dim(num);
          if not staticN(_n_) then num(_n_) = CEIL (1000 * ranuni(123));
        end;

        do _n_ = 1 to dim(chr);
          if not staticC(_n_) then chr(_n_) = repeat(byte(65+26*ranuni(123)), 15 * ranuni(123));
        end;

        output;
      end;
    run;
  %end;
%mend;

options mprint;

%let SCD2_LIB = WORK;

proc datasets nolist noprint lib=&SCD2_LIB mt=data kill;
run;
quit;

%make_data(lib=&SCD2_LIB, n=1)

写入一个通用的scd-2检查器宏,以在属性值与先前有效日期范围保持不变时输出包含缺失值(空白)的表。它不是很“滴”,而是很好的视觉效果,可以看到“缺口”,表示信息不变。

该宏由另一个进程调用,该进程发现scd-2表并确定将被检查为可能具有值更改的属性的数字和字符变量。输出保存在名为* _changed的相应数据集中。

%macro scan_scd(data=, idvar=, nvars=, cvars=);

  %local i nCount cCount;

  %let nCount = %sysfunc(countw(&nvars));
  %let cCount = %sysfunc(countw(&cvars));

  %do i = 1 %to &nCount; %local nvar&i; %let nvar&i = %scan(&nvars,&i); %end;
  %do i = 1 %to &cCount; %local cvar&i; %let cvar&i = %scan(&cvars,&i); %end;

  data &data._changes;
    if 0 then set &data(keep=rowid &idvar valid_from_dt valid_to_dt);

    retain
        %do i = 1 %to &nCount; &&nvar&i &&nvar&i.._was %end;
        %do i = 1 %to &cCount; &&cvar&i &&cvar&i.._was %end;
    ;

    set 
      &data (obs=0 rename=(
        %do i = 1 %to &nCount; &&nvar&i = ___nprv&i %end; 
        %do i = 1 %to &cCount; &&cvar&i = ___cprv&i %end;
      ))
      &data (obs=0 rename=(
        %do i = 1 %to &nCount; &&nvar&i = &&nvar&i.._was %end; 
        %do i = 1 %to &cCount; &&cvar&i = &&cvar&i.._was %end;
      ))
      &data;

    by &idvar.;

    if first.&idvar. then do;
      %do i = 1 %to &nCount; ___nprv&i = &&nvar&i; %end;
      %do i = 1 %to &nCount; &&nvar&i.._was = .; %end;
      %do i = 1 %to &cCount; ___cprv&i = &&cvar&i; %end;
      %do i = 1 %to &cCount; &&cvar&i.._was = ''; %end;
    end;
    else do;
      %do i = 1 %to &nCount; &&nvar&i.._was = ifn (&&nvar&i = ___nprv&i, ., ___nprv&i); %end;
      %do i = 1 %to &cCount; &&cvar&i.._was = ifc (&&cvar&i = ___cprv&i,'', ___cprv&i); %end;

      %do i = 1 %to &nCount; ___nprv&i = &&nvar&i; %end;
      %do i = 1 %to &cCount; ___cprv&i = &&cvar&i; %end;
    end;
  run;
%mend;

scd-2发现和扫描调用

收集了一些候选库中所有表的名称。

proc sql;
  create table scd_datasets as select libname, memname from dictionary.tables
  where libname = "&SCD2_LIB"
  ;
quit;

文件名mprint和选项mfile可以将宏生成的代码存储在外部文件中,以供进一步检查和研究;

filename mprint "c:\temp\macro-source.sas" ;
%let rc = %sysfunc(fdelete(%sysfunc(pathname(mprint))));

options nomprint;
options mprint mfile;

处理每个数据集。使用数据集功能访问数据集元数据信息。

data _null_;
  set scd_datasets;
  scd_table = catx('.',libname,memname);
  scd_id_var = trim(memname) || '_ID';
  rowid_var = 'ROWID';
  from_dt_var = 'VALID_FROM_DT';
  to_dt_var = 'VALID_TO_DT';

  dsid = open (scd_table);
  if dsid then do;

    if varnum(dsid, scd_id_var) 
     & varnum(dsid, rowid_var)
     & varnum(dsid, from_dt_var)
     & varnum(dsid, to_dt_var)
    then do;

      length nvars $32000;
      length cvars $32000;

      nvars = '';
      cvars = '';

      do _n_ = 1 to attrn(dsid,'nvar');
        varname = upcase(varname(dsid,_n_));
        vartype = vartype(dsid,_n_);

        if varname ne upcase(scd_id_var)
         & varname ne rowid_var
         & varname ne from_dt_var
         & varname ne to_dt_var
        then do;
          if vartype = 'N' then nvars=catx(' ',nvars,varname);
          if vartype = 'C' then cvars=catx(' ',cvars,varname);
        end;
      end;

      %* Queue the invocation of the SCD-2 scanner macro for this particular data set;

      call execute (
%*        '%put NOTE: ' ||;
        '%nrstr(%scan_scd(' ||
          'data=' || trim(scd_table) ||
          ', idvar=' || scd_id_var ||
          ', nvars=' || trim(nvars) ||
          ', cvars=' || trim(cvars) ||
        '))'
      );
    end;

    dsid = close(dsid);
  end;
run;

options nomfile nomprint;
filename mprint;

答案 1 :(得分:-1)

香hand K

是的,这是可能的,您需要创建一个通用宏函数来执行此操作。 我看到下面的示例,其中包含您提供的示例。

/* 1st function - table criteria */
%macro manytables(table, columns);
    data &table.(keep = &columns.); /* keep only the columns you want */
        set data_origin; /* Name of your source table */
    run;    
%mend;


/* Function wating 30 second for executuion*/
%macro waiting;
       data _null_;
              time_calc = sleep(1,30);
       run;
%mend;


%let BD = 0; /* initializes the count variable */

/* 2nd function - generate 200 tables about your criteria. */
%macro loop_200_tables;
    %do %while (&BD < 200);

         %waiting; /* to avoid network error and processing. */

         /* call your function */
         %manytables(STUDENT_details_&BD., STUDENT_ID VALID_FROM_DT VALID_TO_DT NAME CITY CONTACT_NO BRANCH);

         %let BD = %eval(&BD. + 1);
         %put STUDENT_details %eval(&BD. - 1) OK!;
         %PUT waiting new processing...;

    %end;

    %put finish;    
%MEND loop_200_tables;
%loop_200_tables;