SAS在另一个数据集中使用类似数组的查找数据集

时间:2014-02-15 19:52:40

标签: dataset sas lookup

我有1个数据集,其中包含学校的内容说明 内容:

num       description
content1  math
content2  spanish
content3  geography
content4  chemistry
content5  history

在另一个数据集(学生)中,我有数组content1-content5,我使用一个标记来表示每个学生的内容。

name age content1 content2 content3 content4 content5 
BOB  15     1        1        1                 1   
BRYA 16
CARL 15              1                          1  
SUE  17                       1        1        1
LOU  15                                         1 

如果我使用这样的代码:

    data students1;
    set students;
    array content[5];
    format allcontents $100.;
    do i=1 to dim(content);
    if content[i]=1 then do;
    allcontents=cat(vname(content[i]),',',allcontents); 
    end;
    end;
    run;

结果是:

name age content1 content2 content3 content4 content5 allcontents 
BOB  15     1        1        1                 1     content1,content2,content3,content5,
BRYA 16
CARL 15              1                          1     content2,content5,
SUE  17                       1        1        1     content3,content4,content5,
LOU  15                                         1     content5

1)我想使用查找表的名称(数据集内容)来使用内容的名称而不是变量allcontents中的内容[1-5]的数组名称。我怎么能这样做?

2)后来我想通过内容描述得到结果,而不是像学生那样:

description  name age
math         BOB  15
spanish      BOB  15
geography    BOB  15
history      BOB  15
spanish      CARL 15
history      CARL 15
spanish      SUE  17
chemistry    SUE  17 
history      SUE  17
history      LOU  15 

有可能吗?

感谢。

2 个答案:

答案 0 :(得分:2)

首先,从this post抓取%create_hash()宏。

使用哈希表查找值。

data students1;
set students
array content[5];
format num $32. description $16.;
if _n_ = 1 then do;
   %create_hash(cnt,num,description,"contents");
end;
do i=1 to 5;
   if content[i]=1 then do;
      num = vname(content[i]);
      rc = cnt.find();
      output;
   end;
end;
keep description name age;
run;

答案 1 :(得分:1)

我发现proc transpose合适。对于问题2)进行一次就足够了,对于重命名变量contents1-5进行两次(因此问题1)。关键是ID statement中的proc transpose,它会根据相应的转置订单自动重命名变量。

下面的代码应该为您提供所需的答案(尽管名称按字母顺序排列,可能与您的原始排序不同)。

/* original data sets */
data names;
    input num $ description $;
    cards;
content1 math
content2 spanish
content3 geography
content4 chemistry
content5 history
;run;

data students;
    input name $ age content1 content2 content3 content4 content5;
    cards;
BOB  15     1        1        1        .        1
BRYA 16     .        .        .        .        .
CARL 15     .        1        .        .        1
SUE  17     .        .        1        1        1
LOU  15     .        .        .        .        1
;run;

/* transpose */
proc sort data=students out=tmp_sorted;
    by name age;
run;

proc transpose data=tmp_sorted out=tmp_transposed;
    by name age;
run;

/* merge the names of content1-5 */
* If you want to preserve ordering from contents1-contents5
* instead of alphabetical ordering of "description" column
* from a-z, do not drop the "num" column for further use.;
proc sql;
    create table tmp_merged as
    select B.description, A.name, A.age, B.num, A.COL1
    from tmp_transposed as A
    left join names as B
        on A._NAME_=B.num
    order by A.name, B.num;
quit;

/* transpose again */
proc transpose data=tmp_merged(drop=num) out=tmp_renamed(drop=_name_);
    by name age;
    ID description; *name the transposed variables;
run;

/* answer (1) */
data ans1;
    set tmp_renamed;
    array content[5] math--history;
    format allcontents $100.;
    do i=1 to dim(content);
        * better use cats (cat does not seem to work);
        if content[i]=1 then allcontents=cats(allcontents,',',vname(content[i])); 
    end;
    *kill the leading comma;
    allcontents=substr(allcontents,2,99);
run;

/* answer (2) */
data ans2(drop=num col1);
    set tmp_merged;
    where col1=1;
run;

*cleanup;
proc datasets lib=work nolist;
    delete tmp_:;
quit;