Question

我必须使用数百万条记录对SAS表进行分区，并根据月度日期标准将其输出到多个SAS表。例如，如果customer_id在年 - 月（日期格式）201308和201408之间有效，则应为此一条记录创建12个表。每个表都有下面的列字段，以及一个名为“YearMonth”的新创建的列，因为它在第一个表中应该有201308,201309,201310等。

以下表格用于说明上述观点。

包含一个样本记录的原始表

Cust_ID     Eff_YM  Trm_YM   
NH000001    201308  201408

新表201308

Cust_ID     Eff_YM  Trm_YM  YearMonth     
NH000001    201308  201408  201308

新表201309

Cust_ID     Eff_YM  Trm_YM  YearMonth     
NH000001    201308  201408  201309

新表201310

Cust_ID     Eff_YM  Trm_YM  YearMonth     
NH000001    201308  201408  201310

Answer 1

创建示例dataset。

data test;
infile datalines;
input Cust_ID : $10.
      Eff_YM : 8.
      Trm_YM : 8.
      ;
datalines;
NH000001    201308  201408
NH000001    201308  201312
;
run;

从minimum中选择maximum和dataset时段。将有尽可能多的不同datasets个不同的区间。

proc sql noprint;
select min(Eff_YM) into: min_Eff_YM from test;
select max(Trm_YM) into: max_Trm_YM from test;
quit;

因为我们需要事先在datasets语句中指定data的名称，所以在此创建名称列表。

data dataset_names(keep=period dataset_name);
length dataset_name $20.;
format min_date date9. max_date date9.;
min_date=mdy((substr(compress(&min_Eff_YM.),5,2)),1,(substr(compress(&min_Eff_YM.),1,4)));
max_date=mdy((substr(compress(&max_Trm_YM.),5,2)),1,(substr(compress(&max_Trm_YM.),1,4)));
no_of_months=intck('month',min_date,max_date);
do i=0 to no_of_months;
period=put(intnx('month',min_date,i),yymmn6.);
dataset_name=compress(cat("dataset_",period));
output;
end;
run;


proc sql noprint;
select dataset_name into :all_datsets separated by " " from dataset_names;
select count(dataset_name) into :num_datasets from dataset_names;
select period into: all_periods separated by "," from dataset_names;
quit;

使用Eff_YM和Trm_YM

之间的间隔创建可能的记录列表

%macro chk(YYMM);
data test_all;
set test;
No_of_loop=intck('month',
                  mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),
                  mdy((substr(compress(Trm_YM),5,2)),1,(substr(compress(Trm_YM),1,4))));

do i=0 to No_of_loop;
YearMonth = put(intnx('month',mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),i),yymmn6.);
output;
end;
run;
%mend;
%chk;

根据句点名称

将数据集划分为单独的数据集

%macro data_dates;

data &all_datsets.;
set test_all;
%do i=1 %to &num_datasets.;
if YearMonth=scan("&all_periods.",&i.,",") then do;
output dataset_%sysfunc(scan("&all_periods.",&i.,","));
end;
%end;
run;
%mend;
%data_dates;

Answer 2

data HAVE;
    Length CUST_ID $8;
    Input Cust_ID $ Eff_YM Trm_YM;
datalines;
NH000001    201308  201408
NH000002    201301  201401
;
run;

获取最小和最大日期以用于构建所有可能的数据集

proc sql noprint;
select min(Eff_YM), max(Trm_YM) into: min_Eff_YM, :max_Trm_YM
    From HAVE;
quit;
%Put min_EFF_YM= &min_EFF_YM;
%Put max_TRM_YM= &max_TRM_YM;

构建所有可能的数据集并创建用于循环的宏变量

data DSNs(drop=start i);
    Start=input(put(&min_EFF_YM,6.),yymmn6.);
    Diff=intck('month',Start,input(put(&max_TRM_YM,6.),yymmn6.));
    Put DIFF=;

    Do i = 0 to diff;
        DSN=Cats("_",put(intnx('Month',Start,i,'b'),yymmn6.));
        Output;
    End;
run;

Proc sql noprint;
    Select count(dsn) into :cnt separated by "" from DSNs;
    Select dsn into :all1 - :all&cnt from DSNs;
Quit;
%Put CNT: &cnt;
%Put ALL1: &all1;
%Put ALL&cnt: &&all&cnt;

创建数据集并插入适当的记录

%Macro Create_Tables;
Data %do i = 1 %to &cnt; &&all&i %end;
    ;
    set HAVE;
    %do i=0 %to 12;
        YearMonth_dt=intnx('month',input(put(EFF_YM,6.),yymmn6.),&i);
        YearMonth=input(put(YearMonth_dt,yymmn6.),6.);
        YearMonth_dsn=cats("_",put(yearmonth_dt,yymmn6.));
        %do j = 1 %to &cnt;
            %Let DSN=&&all&j;
            if YearMonth_dsn="&dsn" then output &dsn;
        %end;
    %end;
    Keep CUST_ID EFF_YM TRM_YM YEARMONTH;
run;
%Mend;
%Create_Tables ;

Answer 3

解决问题的方法很简单。从旧数据集创建一个新数据集，并从开始年月到结束年月循环。稍后创建一个唯一年份月份的宏列表，该宏列表位于您之前创建的数据集中，并循环创建数据集。

data have;
 input cust_id $ eff_ym :yymmn6. trm_ym :yymmn6. ;
 format eff_ym trm_ym yymmdd10.;
datalines;
NH000001    201308  201408
NH000002    201301  201401
;
run;

data staging;
set have;
do i = intck('month',0,eff_ym) to intck('month',0,trm_ym);
    yearmonth=intnx('month',0,i);
    output;
end;

format yearmonth yymmdd10.;
drop i;
run;
%macro splitter;
proc sql noprint;
    select distinct yearmonth format=date9. into :yearmonth1-:yearmonth99999 
      from staging;
quit;

%do i = 1 %to &sqlobs;
    %let dsn=%sysfunc(putn(%sysfunc(inputn(&&yearmonth&i,date9.)),yymmn6.));

        proc append base=data_&dsn data=staging(where=(yearmonth="&&yearmonth&i"d));
        run;
%end;

%mend splitter;
options mprint;
%splitter

SAS：如果满足日期条件，则循环和输出记录

3 个答案: