分区sas数据集进行批处理的最快方法是什么?

时间:2012-02-24 22:12:56

标签: sas batch-processing

我有一个大的sas数据集(1.5m obs,~250个变量),我需要将它们分成几个相同大小的sas数据集进行批处理。每个数据集都需要包含所有变量,但只包含一小部分。这样做的最快方法是什么?

2 个答案:

答案 0 :(得分:2)

您可以执行以下操作:

%macro splitds(inlib=,inds=,splitnum=,outid=);

  proc sql noprint;
    select nobs into :nobs
    from sashelp.vtable
    where libname=upcase("&inlib") and memname=upcase("&inds");
  quit;
  %put Number of observations in &inlib..&inds.: &nobs;

  data %do i=1 %to &splitnum.;
         &outid.&i
       %end;;
    set &inds.;
    %do j=1 %to (&splitnum.-1);
      %if &j.=1 %then %do;
        if
      %end;
      %else %do;
        else if
      %end;
                _n_<=((&nobs./&splitnum.)*&j.) then output &outid.&j.;
    %end;
    else output &outid.&splitnum.;
  run;
%mend;

将MYLIB.MYDATA拆分为10个名为NEWDATA1 - NEWDATA10的数据集的示例调用将是:

%splitds(inlib=mylib,inds=mydata,splitnum=10,outid=newdata);

答案 1 :(得分:1)

试试这个。我还没有测试过,所以期待某个地方出现错误。您需要编辑对BATCH_PROCESS的宏调用,以包括数据集的名称,新数据集的数量等。

%macro nobs (dsn);
   %local nobs dsid rc;
   %let nobs=0;
   %let dsid = %sysfunc(open(&dsn));
   %if &dsid %then %do;
      %let nobs = %sysfunc(attrn(&dsid,NOBS));
   %end;
   %else %put Open for dataset &dsn failed - %sysfunc(sysmsg());
   %let rc   = %sysfunc(close(&dsid));
   &nobs
%mend nobs;

%macro batch_process(dsn_in,dsn_out_prefix,number_of_dsns);

  %let dsn_obs = &nobs(&dsn_in);
  %let obs_per_dsn = %sysevalf(&dsn_obs / &number_of_dsns);

  data
     %do i = 1 %to &number_of_dsns;
        &dsn_out_prefix.&i
     %end;
     ;
     set &dsn_in;
     drop _count;
     retain _count 0;
     _count = _count + 1;
     %do i = 1 %to &number_of_dsns;
        if (1 + ((&i - 1) * &obs_per_dsn)) <= _count <= (&i * &obs_per_dsn) then do;
           output &dsn_out_prefix.&i;
        end; 
     %end;
  run;

%mend batch_process;

%batch_process( dsn_in=DSN_NAME , dsn_out_prefix = PREFIX_ , number_of_dsns = 5 );