我们的SCD Type 2表几乎有200个表,每个表有250列。
对于EX:学生详细信息
STUDENT_ID VALID_FROM_DT VALID_TO_DT NAME CITY CONTACT_NO BRANCH
1 04-April-2018 10-April-2018 XYZ Chennai 12345 CSE
1 10-April-2018 31-DEC-2055 XYZ MUMBAI 87777 CSE
寻找某种通用解决方案,在这种情况下,对于这种情况,仅精确匹配不匹配的列,输出应为
输出:
STUDENT_ID VALID_FROM_DT VALID_TO_DT CITY CONTACT_NO
1 04-April-2018 10-April-2018 Chennai 12345
1 10-April-2018 31-DEC-2055 MUMBAI 87777
这种解决方案是否可能,如果可以,我可以将其用于我的所有200张桌子。
答案 0 :(得分:0)
大量维表,每个维表具有大量属性列,这是一件大事。对于通用解决方案,您将需要使用元数据信息来获取库中数据集的列表,检查那些数据集以了解“ scd2-idness”的指示(例如,具有与数据集名称相似的ID列) 。对于每个scd2表,您将再次访问元数据以获得non-scd列(不是记录ID,维ID或有效日期范围的任何列)。然后,将通过数据集编写一个过程,并确定属性值与其先前有效日期范围相比有无变化的情况。
请考虑一些生成的具有任意属性名称(即列)名称的任意维名称的scd-2数据。组中的某些列被强制为“静态”(可能会考虑在您的问题中考虑的那些列)
%macro random_name(len=8);
%local i result;
%do i = 1 %to &len;
%let result = &result.%sysfunc(byte(%sysfunc(ranuni(123))*26+65));
%end;
&result
%mend;
%macro make_data(lib=WORK, N=40);
%local outcount i j p suffix out;
%do outcount = 1 %to &N;
%let out = dimtable_%random_name();
%let idvar = &out._id;
%local top cCount nCount cName nName namelen name;
%let top = %sysevalf ( 1000 * %sysfunc(ranuni(123)), FLOOR);
%let nCount = %sysevalf ( 20 * %sysfunc(ranuni(123)), FLOOR);
%let cCount = %sysevalf ( 20 * %sysfunc(ranuni(123)), FLOOR);
%do i = 1 %to &nCount;
%let namelen = %sysevalf(16 * %sysfunc(ranuni(123)), CEIL);
%let name = ;
%do j = 1 %to &namelen;
%let name = &name.%sysfunc(byte(%sysfunc(ranuni(123))*26+65));
%end;
%local numvar&i;
%let numvar&i = nattr&i._&name.;
%end;
%do i = 1 %to &cCount;
%let namelen = %sysevalf(16 * %sysfunc(ranuni(123)), CEIL);
%let name = ;
%do j = 1 %to &namelen;
%let name = &name.%sysfunc(byte(%sysfunc(ranuni(123))*26+65));
%end;
%local chrvar&i;
%let chrvar&i = cattr&i._&name.;
%end;
data &out;
do rowid = 1 to ⊤
if rowid = 1 or ranuni(123) > 0.8 then do;
&idvar + 1;
valid_from_dt = 0;
valid_to_dt = '01-jan-1970'd + floor(60 * ranuni(123));
format valid: yymmdd10.;
attrib
%if &nCount %then %do; %do i = 1 %to &nCount; &&numvar&i %end; length=8 format=6. %end;
%if &cCount %then %do; %do i = 1 %to &cCount; &&chrvar&i %end; length=$20 %end;
;
%if &nCount %then %do; array num %do i = 1 %to &nCount; &&numvar&i %end; ; %end;
%if &cCount %then %do; array chr %do i = 1 %to &cCount; &&chrvar&i %end; ; %end;
array staticN[0:&nCount] _temporary_;
array staticC[0:&cCount] _temporary_;
do _n_ = 1 to hbound(staticN); staticN(_n_) = ranuni(123) < 0.40; end;
do _n_ = 1 to hbound(staticC); staticC(_n_) = ranuni(123) < 0.40; end;
do _n_ = 1 to dim(num);
num(_n_) = CEIL (1000 * ranuni(123));
end;
do _n_ = 1 to dim(chr);
chr(_n_) = repeat(byte(65+26*ranuni(123)), 15 * ranuni(123));
end;
end;
valid_from_dt = valid_to_dt + 1;
valid_to_dt = valid_from_dt + ceil(60 * ranuni(123));
do _n_ = 1 to dim(num);
if not staticN(_n_) then num(_n_) = CEIL (1000 * ranuni(123));
end;
do _n_ = 1 to dim(chr);
if not staticC(_n_) then chr(_n_) = repeat(byte(65+26*ranuni(123)), 15 * ranuni(123));
end;
output;
end;
run;
%end;
%mend;
options mprint;
%let SCD2_LIB = WORK;
proc datasets nolist noprint lib=&SCD2_LIB mt=data kill;
run;
quit;
%make_data(lib=&SCD2_LIB, n=1)
写入一个通用的scd-2检查器宏,以在属性值与先前有效日期范围保持不变时输出包含缺失值(空白)的表。它不是很“滴”,而是很好的视觉效果,可以看到“缺口”,表示信息不变。
该宏由另一个进程调用,该进程发现scd-2表并确定将被检查为可能具有值更改的属性的数字和字符变量。输出保存在名为* _changed的相应数据集中。
%macro scan_scd(data=, idvar=, nvars=, cvars=);
%local i nCount cCount;
%let nCount = %sysfunc(countw(&nvars));
%let cCount = %sysfunc(countw(&cvars));
%do i = 1 %to &nCount; %local nvar&i; %let nvar&i = %scan(&nvars,&i); %end;
%do i = 1 %to &cCount; %local cvar&i; %let cvar&i = %scan(&cvars,&i); %end;
data &data._changes;
if 0 then set &data(keep=rowid &idvar valid_from_dt valid_to_dt);
retain
%do i = 1 %to &nCount; &&nvar&i &&nvar&i.._was %end;
%do i = 1 %to &cCount; &&cvar&i &&cvar&i.._was %end;
;
set
&data (obs=0 rename=(
%do i = 1 %to &nCount; &&nvar&i = ___nprv&i %end;
%do i = 1 %to &cCount; &&cvar&i = ___cprv&i %end;
))
&data (obs=0 rename=(
%do i = 1 %to &nCount; &&nvar&i = &&nvar&i.._was %end;
%do i = 1 %to &cCount; &&cvar&i = &&cvar&i.._was %end;
))
&data;
by &idvar.;
if first.&idvar. then do;
%do i = 1 %to &nCount; ___nprv&i = &&nvar&i; %end;
%do i = 1 %to &nCount; &&nvar&i.._was = .; %end;
%do i = 1 %to &cCount; ___cprv&i = &&cvar&i; %end;
%do i = 1 %to &cCount; &&cvar&i.._was = ''; %end;
end;
else do;
%do i = 1 %to &nCount; &&nvar&i.._was = ifn (&&nvar&i = ___nprv&i, ., ___nprv&i); %end;
%do i = 1 %to &cCount; &&cvar&i.._was = ifc (&&cvar&i = ___cprv&i,'', ___cprv&i); %end;
%do i = 1 %to &nCount; ___nprv&i = &&nvar&i; %end;
%do i = 1 %to &cCount; ___cprv&i = &&cvar&i; %end;
end;
run;
%mend;
scd-2发现和扫描调用
收集了一些候选库中所有表的名称。
proc sql;
create table scd_datasets as select libname, memname from dictionary.tables
where libname = "&SCD2_LIB"
;
quit;
文件名mprint和选项mfile可以将宏生成的代码存储在外部文件中,以供进一步检查和研究;
filename mprint "c:\temp\macro-source.sas" ;
%let rc = %sysfunc(fdelete(%sysfunc(pathname(mprint))));
options nomprint;
options mprint mfile;
处理每个数据集。使用数据集功能访问数据集元数据信息。
data _null_;
set scd_datasets;
scd_table = catx('.',libname,memname);
scd_id_var = trim(memname) || '_ID';
rowid_var = 'ROWID';
from_dt_var = 'VALID_FROM_DT';
to_dt_var = 'VALID_TO_DT';
dsid = open (scd_table);
if dsid then do;
if varnum(dsid, scd_id_var)
& varnum(dsid, rowid_var)
& varnum(dsid, from_dt_var)
& varnum(dsid, to_dt_var)
then do;
length nvars $32000;
length cvars $32000;
nvars = '';
cvars = '';
do _n_ = 1 to attrn(dsid,'nvar');
varname = upcase(varname(dsid,_n_));
vartype = vartype(dsid,_n_);
if varname ne upcase(scd_id_var)
& varname ne rowid_var
& varname ne from_dt_var
& varname ne to_dt_var
then do;
if vartype = 'N' then nvars=catx(' ',nvars,varname);
if vartype = 'C' then cvars=catx(' ',cvars,varname);
end;
end;
%* Queue the invocation of the SCD-2 scanner macro for this particular data set;
call execute (
%* '%put NOTE: ' ||;
'%nrstr(%scan_scd(' ||
'data=' || trim(scd_table) ||
', idvar=' || scd_id_var ||
', nvars=' || trim(nvars) ||
', cvars=' || trim(cvars) ||
'))'
);
end;
dsid = close(dsid);
end;
run;
options nomfile nomprint;
filename mprint;
答案 1 :(得分:-1)
香hand K
是的,这是可能的,您需要创建一个通用宏函数来执行此操作。 我看到下面的示例,其中包含您提供的示例。
/* 1st function - table criteria */
%macro manytables(table, columns);
data &table.(keep = &columns.); /* keep only the columns you want */
set data_origin; /* Name of your source table */
run;
%mend;
/* Function wating 30 second for executuion*/
%macro waiting;
data _null_;
time_calc = sleep(1,30);
run;
%mend;
%let BD = 0; /* initializes the count variable */
/* 2nd function - generate 200 tables about your criteria. */
%macro loop_200_tables;
%do %while (&BD < 200);
%waiting; /* to avoid network error and processing. */
/* call your function */
%manytables(STUDENT_details_&BD., STUDENT_ID VALID_FROM_DT VALID_TO_DT NAME CITY CONTACT_NO BRANCH);
%let BD = %eval(&BD. + 1);
%put STUDENT_details %eval(&BD. - 1) OK!;
%PUT waiting new processing...;
%end;
%put finish;
%MEND loop_200_tables;
%loop_200_tables;