哈希合并宏-使用文件记录指示器“哈希+点=键”

时间:2018-11-07 18:23:58

标签: hash merge sas sas-macro

试图将此宏更新为HASH + point = key。我们的其中一项数据运行的当前版本的宏已开始超出内存限制。我寻求帮助的原因是因为我没有太多时间,也从未真正分析过这段代码,因为直到最近它才是我过程的一部分。

https://www.lexjansen.com/nesug/nesug11/ld/ld01.pdf是我真正不了解的,它是如何设置RID以及如何将其合并到我们的宏中的。实际上,我什至不知道是否可以用当前的宏通过这种方式来实现。

任何帮助将不胜感激。

%macro hashmerge2(varnm,onto,from,byvars,obsqty);

%let data_vars   = %trim   (&varnm);
%let data_vars_a = %sysfunc(tranwrd(&data_vars.,%str( ),%str(" , ")));
%let data_vars_b = %sysfunc(tranwrd(&data_vars.,%str( ), %str(,)));
%let data_key    = %trim   (&byvars);
%let data_key    = %sysfunc(tranwrd(&data_key.,%str( ), %str(" , ")));

%if %index(&varnm,' ') > 0 %then %let varnm3=%substr(%substr(&varnm,1,%index(&varnm,' ')),1,4);
%else %let varnm3=%substr(&varnm,1,4);


data &onto(drop=rc) miss&varnm3(drop=rc);
if 0 then set &onto &from(keep=&varnm. &byvars.);

 declare hash h_merge (dataset: "&from.");

 rc = h_merge.DefineKey  ("&data_key.");
 rc = h_merge.DefineData ("&data_vars_a.");
 rc = h_merge.DefineDone ();


 do until (eof);
   set &onto end = eof;
   call missing(&data_vars_b.);
   rc = h_merge.find ();
   if rc = 0 then do;
      output &onto;
      from = "&from.";
   end;
   else do;
      output miss&varnm3 &onto;
      from = "&onto.";
   end;
 end;

stop;
run;

%mend;

1 个答案:

答案 0 :(得分:3)

因此,我认为这是您要寻找的东西,但是它仍然需要将“ lookup”表中的所有键值加载到哈希对象中。但是它可以节省空间,而不是只加载与关键变量匹配的观察编号,而无需加载非关键变量。

%macro hash_merge_point
/*-----------------------------------------------------------------------------
Merge variables ONTO large table FROM small table using POINT= dataset option.
-----------------------------------------------------------------------------*/
(varnm  /* Space delimited list of variable to retrieve */
,onto   /* Dataset to update */
,from   /* Dataset to get values from */
,byvars /* Space delimited list of key variables to match on */
);
%local missds key_vars;
%let missds=%scan(&varnm,1,%str( ));
%let missds=miss%substr(&missds,1,%sysfunc(min(28,%length(&missds))));

%let key_vars="%sysfunc(tranwrd(%sysfunc(compbl(&byvars)),%str( )," "))";

data &onto(drop=rc) &missds(drop=rc);
  if 0 then set &onto &from(keep=&varnm. &byvars.);

  declare hash h_merge ();
  rc = h_merge.DefineKey  (&key_vars);
  rc = h_merge.DefineData ('_point');
  rc = h_merge.DefineDone ();
  do _point=1 to _nobs;
    set &from(keep=&byvars) point=_point nobs=_nobs;
    rc = h_merge.add();
  end;

  do until (eof);
    set &onto end = eof;
    rc = h_merge.find ();
    if rc = 0 then do;
      set &from (keep=&varnm) point=_point;
      from = "&from.";
      output &onto;
    end;
    else do;
      call missing(of &varnm);
      from = "&onto.";
      output ;
    end;
  end;

stop;
run;

%mend hash_merge_point;

这是一个简单的例子:

data lookup;
  input id age sex $1.;
cards;
1 10 F
2 20 .
4 30 M
;
data master ;
  input id wt ;
cards;
1 100
2 150
3 180
4 200
;

%hash_merge_point
/*-----------------------------------------------------------------------------
Merge variables ONTO large table FROM small table using POINT= dataset option.
-----------------------------------------------------------------------------*/
(varnm=age sex  /* Space delimited list of variable to retrieve */
,onto=master   /* Dataset to update */
,from=lookup  /* Dataset to get values from */
,byvars=id /* Space delimited list of key variables to match on */
);

enter image description here

如果目标表已经具有通过合并创建的变量(因此您只想覆盖当前值),则可以使用MODIFY语句而不是SET语句来就地修改数据集。但是您可能需要确保在尝试此操作之前已经备份了表。另外请注意,如果您想要标记源from变量,那么该变量也需要存在。

因此,使用此更新后的主表:

data master ;
  input id wt ;
  length age 8 sex $1 from $50;
cards;
1 100
2 150
3 180
4 200
;

此版本的宏:

%macro hash_merge_point
/*-----------------------------------------------------------------------------
Merge variables ONTO large table FROM small table using POINT= dataset option.
-----------------------------------------------------------------------------*/
(varnm  /* Space delimited list of variable to retrieve */
,onto   /* Dataset to update */
,from   /* Dataset to get values from */
,byvars /* Space delimited list of key variables to match on */
);
%local key_vars;
%let key_vars="%sysfunc(tranwrd(%sysfunc(compbl(&byvars)),%str( )," "))";

data &onto;
  if 0 then set &onto (keep=&byvars.);

  declare hash h_merge ();
  rc = h_merge.DefineKey  (&key_vars);
  rc = h_merge.DefineData ('_point');
  rc = h_merge.DefineDone ();
  do _point=1 to _nobs;
    set &from(keep=&byvars) point=_point nobs=_nobs;
    rc = h_merge.add();
  end;

  do until (eof);
    modify &onto end = eof;
    rc = h_merge.find ();
    if rc = 0 then do;
      set &from (keep=&varnm) point=_point;
      from = "&from.";
    end;
    else from = "&onto.";
    replace;
  end;

stop;
run;

%mend hash_merge_point;

如果运行此代码:

proc print data=master; 
 title 'BEFORE';
run;

%hash_merge_point
/*-----------------------------------------------------------------------------
Merge variables ONTO large table FROM small table using POINT= dataset option.
-----------------------------------------------------------------------------*/
(varnm=age sex  /* Space delimited list of variable to retrieve */
,onto=master   /* Dataset to update */
,from=lookup  /* Dataset to get values from */
,byvars=id /* Space delimited list of key variables to match on */
);

proc print data=master; 
  title 'AFTER';
run;

您得到以下结果:

enter image description here