SAS哈希合并宏 - 输出多个数据集

时间:2016-08-11 20:41:46

标签: hash merge sas

我正在创建一个哈希合并宏,我想为合并的表创建一个数据集,为缺少的值创建数据集。以下是我正在寻找定期合并的例子。

data &onto miss&varnm xtra&varnm;
  merge &onto(in=in1) fr2(in=in2);
    by &byvars;
    if in2 then from = "&from";
    else from = "&onto";
    if in1 and in2 then output &onto;
    else if in1 and not in2 then output &onto miss&varnm;
    else if not in1 and in2 then output xtra&varnm;
run;

我认为我的合并运行得很好,但我不确定如何获取'缺失'数据集,以便获得任何帮助。

%macro hashmerge(varnm,onto,from,byvars);
/* The inputs are the variables to merge,
   the merge onto data set, the merge from
   data set and the key(s). vanrnm and 
   byvars are set up to accept multiple inputs. */

%let data_vars   = %trim   (&varnm);
%let data_vars_a = %sysfunc(tranwrd(&data_vars.,%str( ),%str(",")));
%let data_vars_b = %sysfunc(tranwrd(&data_vars.,%str( ), %str(,)));
%let data_key    = %trim   (&byvars);
%let data_key    = %sysfunc(tranwrd(&data_key.,%str( ), %str(",")));

data &onto(drop=rc);
set &onto &from(keep=&varnm &byvars);

 declare hash h_merge (dataset: "&from.");

 rc = h_merge.DefineKey  ("&data_key.");
 rc = h_merge.DefineData ("&data_vars_a.");
 rc = h_merge.DefineDone ();

 do until (eof);
   set &onto end = eof;
   call missing(&data_vars_b.);
   rc = h_merge.find ();
   output;
 end;
stop;
run;

%mend;

3 个答案:

答案 0 :(得分:0)

通过设置哈希对象的方式,您无法轻松获取xtra数据集。您可以通过类似的方式获取missonto而无需太多麻烦,并对data语句进行必要的更改:

if rc = 0 then output &onto;
else output miss&varnm;

但是,要查找from数据集中尚未使用但通常会转到xtra数据集的所有值,您需要跟踪哪些值< em> has 用于合并,然后在数据步骤结束时将其余部分复制到新的哈希对象中并输出该对象。

N.B。使用当前代码,所有行都输出到&onto数据集,即使它们缺少查找变量的值。

答案 1 :(得分:0)

这样做有点难,但在这里你可以了解如何做到这一点。

创建另一个哈希哈希对象,就像第一个一样。从第二个对象,您将删除已经匹配的项目。最后,您只需将第二个对象输出到数据集。它需要对哈希对象进行内存处理,但它是第一次尝试。

(没有运行代码并抱歉任何错误)

%macro hashmerge(varnm,onto,from,byvars);
/* The inputs are the variables to merge,
   the merge onto data set, the merge from
   data set and the key(s). vanrnm and 
   byvars are set up to accept multiple inputs. */

%let data_vars   = %trim   (&varnm);
%let data_vars_a = %sysfunc(tranwrd(&data_vars.,%str( ),%str(",")));
%let data_vars_b = %sysfunc(tranwrd(&data_vars.,%str( ), %str(,)));
%let data_key    = %trim   (&byvars);
%let data_key    = %sysfunc(tranwrd(&data_key.,%str( ), %str(",")));

data &onto (drop=rc);
   if 0 set &onto &from(keep=&varnm. &byvars.);   

   declare hash h_merge    (dataset: "&from.");
   rc = h_merge.DefineKey  ("&data_key.");
   rc = h_merge.DefineData ("&data_vars_a.");
   rc = h_merge.DefineDone ();

   /*hash table that will hold data not yet founded by find method */
   declare hash h_merge_copy    (dataset: "&from.");
   rc = h_merge_copy.DefineKey  ("&data_key.");
   rc = h_merge_copy.DefineData ("&data_vars_a.");
   rc = h_merge_copy.DefineDone ();

   do until (eof);

      set &onto. end = eof;

      if h_merge.find () = 0 then do;
         /*removing data from h_merge_copy if founded and if not allready removed*/
         if h_merge_copy.check() = 0 then do;
            rc = h_merge_copy.remove();
         end;
      end;
      else
         output miss&varnm.; /*no find match - output to miss&varnm.*/

      output &onto.; /*find match in hash or not output to &onto. (left join)*/
   end;

   h_merge_copy.output(dataset: "&onto.");

   stop;
run;

%mend;

我可以改进这个以消耗更少的内存,就像在第二个哈希中没有存储数据一样,但今天它已经太晚了。希望这有点帮助。

答案 2 :(得分:0)

感谢大家的帮助和建议。我不确定如何添加&#39; xtra&#39;数据集没有创建另一个哈希,这违反了哈希合并的目的。所以,我需要创建缺失和合并的数据集。

%macro hashmerge(varnm,onto,from,byvars);


%let data_vars   = %trim   (&varnm);
%let data_vars_a = %sysfunc(tranwrd(&data_vars.,%str( ),%str(",")));
%let data_vars_b = %sysfunc(tranwrd(&data_vars.,%str( ), %str(,)));
%let data_key    = %trim   (&byvars);
%let data_key    = %sysfunc(tranwrd(&data_key.,%str( ), %str(",")));

data &onto(drop=rc) miss&varnm(drop=rc);
   if 0 then set &onto &from(keep=&varnm. &byvars.);

   declare hash h_merge (dataset: "&from.");
   rc = h_merge.DefineKey  ("&data_key.");
   rc = h_merge.DefineData ("&data_vars_a.");
   rc = h_merge.DefineDone ();

   do until (eof);
     set &onto end = eof;
     call missing(&data_vars_b.);
     rc = h_merge.find ();
     if rc = 0 then do;
       output &onto;
       from = "&from.";
     end;
     else do;
       output miss&varnm &onto;
       from = "&onto.";
     end;
   end;

stop;
run;

%mend;

如果您只希望将新缺失的值包含在新数据集中,请删除&#39;来自&#39;变量很容易修改代码。

如果有人知道更有效的方法,请发表建议。