我必须使用数百万条记录对SAS表进行分区,并根据月度日期标准将其输出到多个SAS表。例如,如果customer_id在年 - 月(日期格式)201308和201408之间有效,则应为此一条记录创建12个表。每个表都有下面的列字段,以及一个名为“YearMonth”的新创建的列,因为它在第一个表中应该有201308,201309,201310等。
以下表格用于说明上述观点。
包含一个样本记录的原始表
Cust_ID Eff_YM Trm_YM
NH000001 201308 201408
新表201308
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201308
新表201309
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201309
新表201310
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201310
答案 0 :(得分:2)
创建示例dataset
。
data test;
infile datalines;
input Cust_ID : $10.
Eff_YM : 8.
Trm_YM : 8.
;
datalines;
NH000001 201308 201408
NH000001 201308 201312
;
run;
从minimum
中选择maximum
和dataset
时段。将有尽可能多的不同datasets
个不同的区间。
proc sql noprint;
select min(Eff_YM) into: min_Eff_YM from test;
select max(Trm_YM) into: max_Trm_YM from test;
quit;
因为我们需要事先在datasets
语句中指定data
的名称,所以在此创建名称列表。
data dataset_names(keep=period dataset_name);
length dataset_name $20.;
format min_date date9. max_date date9.;
min_date=mdy((substr(compress(&min_Eff_YM.),5,2)),1,(substr(compress(&min_Eff_YM.),1,4)));
max_date=mdy((substr(compress(&max_Trm_YM.),5,2)),1,(substr(compress(&max_Trm_YM.),1,4)));
no_of_months=intck('month',min_date,max_date);
do i=0 to no_of_months;
period=put(intnx('month',min_date,i),yymmn6.);
dataset_name=compress(cat("dataset_",period));
output;
end;
run;
proc sql noprint;
select dataset_name into :all_datsets separated by " " from dataset_names;
select count(dataset_name) into :num_datasets from dataset_names;
select period into: all_periods separated by "," from dataset_names;
quit;
使用Eff_YM
和Trm_YM
%macro chk(YYMM);
data test_all;
set test;
No_of_loop=intck('month',
mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),
mdy((substr(compress(Trm_YM),5,2)),1,(substr(compress(Trm_YM),1,4))));
do i=0 to No_of_loop;
YearMonth = put(intnx('month',mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),i),yymmn6.);
output;
end;
run;
%mend;
%chk;
根据句点名称
将数据集划分为单独的数据集%macro data_dates;
data &all_datsets.;
set test_all;
%do i=1 %to &num_datasets.;
if YearMonth=scan("&all_periods.",&i.,",") then do;
output dataset_%sysfunc(scan("&all_periods.",&i.,","));
end;
%end;
run;
%mend;
%data_dates;
答案 1 :(得分:1)
data HAVE;
Length CUST_ID $8;
Input Cust_ID $ Eff_YM Trm_YM;
datalines;
NH000001 201308 201408
NH000002 201301 201401
;
run;
获取最小和最大日期以用于构建所有可能的数据集
proc sql noprint;
select min(Eff_YM), max(Trm_YM) into: min_Eff_YM, :max_Trm_YM
From HAVE;
quit;
%Put min_EFF_YM= &min_EFF_YM;
%Put max_TRM_YM= &max_TRM_YM;
构建所有可能的数据集并创建用于循环的宏变量
data DSNs(drop=start i);
Start=input(put(&min_EFF_YM,6.),yymmn6.);
Diff=intck('month',Start,input(put(&max_TRM_YM,6.),yymmn6.));
Put DIFF=;
Do i = 0 to diff;
DSN=Cats("_",put(intnx('Month',Start,i,'b'),yymmn6.));
Output;
End;
run;
Proc sql noprint;
Select count(dsn) into :cnt separated by "" from DSNs;
Select dsn into :all1 - :all&cnt from DSNs;
Quit;
%Put CNT: &cnt;
%Put ALL1: &all1;
%Put ALL&cnt: &&all&cnt;
创建数据集并插入适当的记录
%Macro Create_Tables;
Data %do i = 1 %to &cnt; &&all&i %end;
;
set HAVE;
%do i=0 %to 12;
YearMonth_dt=intnx('month',input(put(EFF_YM,6.),yymmn6.),&i);
YearMonth=input(put(YearMonth_dt,yymmn6.),6.);
YearMonth_dsn=cats("_",put(yearmonth_dt,yymmn6.));
%do j = 1 %to &cnt;
%Let DSN=&&all&j;
if YearMonth_dsn="&dsn" then output &dsn;
%end;
%end;
Keep CUST_ID EFF_YM TRM_YM YEARMONTH;
run;
%Mend;
%Create_Tables ;
答案 2 :(得分:1)
解决问题的方法很简单。从旧数据集创建一个新数据集,并从开始年月到结束年月循环。稍后创建一个唯一年份月份的宏列表,该宏列表位于您之前创建的数据集中,并循环创建数据集。
data have;
input cust_id $ eff_ym :yymmn6. trm_ym :yymmn6. ;
format eff_ym trm_ym yymmdd10.;
datalines;
NH000001 201308 201408
NH000002 201301 201401
;
run;
data staging;
set have;
do i = intck('month',0,eff_ym) to intck('month',0,trm_ym);
yearmonth=intnx('month',0,i);
output;
end;
format yearmonth yymmdd10.;
drop i;
run;
%macro splitter;
proc sql noprint;
select distinct yearmonth format=date9. into :yearmonth1-:yearmonth99999
from staging;
quit;
%do i = 1 %to &sqlobs;
%let dsn=%sysfunc(putn(%sysfunc(inputn(&&yearmonth&i,date9.)),yymmn6.));
proc append base=data_&dsn data=staging(where=(yearmonth="&&yearmonth&i"d));
run;
%end;
%mend splitter;
options mprint;
%splitter