目标:从Have table + Help表转到Want表。目前的实施(下图)很慢。我相信这是如何不使用SAS宏的一个很好的例子,但我很好奇是否...... 1.宏观方法可以挽救/制定得足够快,可行 (例如,proc append应该加速堆叠数据集的操作,但我无法看到任何性能提升。) 2.所有替代品都会是什么样子。
我已经编写了一个非宏解决方案,我将在下面发布以供比较。
Data:
data have ;
input name $ term $;
cards;
Joe 2000
Joe 2002
Joe 2008
Sally 2001
Sally 2003
; run;
proc print ; run;
data help ;
input terms $ ;
cards;
2000
2001
2002
2003
2004
2005
2006
2007
2008
; run;
proc print ; run;
data want ;
input name $ term $ status $;
cards;
Joe 2000 here
Joe 2001 gone
Joe 2002 here
Joe 2003 gone
Joe 2004 gone
Joe 2005 gone
Joe 2006 gone
Joe 2007 gone
Joe 2008 here
Sally 2001 here
Sally 2002 gone
Sally 2003 here
; run;
proc print data=have ; run;
我可以写一个小宏来让每个人都在那里:
%MACRO RET(NAME);
proc sql ;
create table studtermlist as
select distinct term
from have
where NAME = "&NAME"
;
SELECT Max(TERM) INTO :MAXTERM
FROM HAVE
WHERE NAME = "&NAME"
;
SELECT MIN(TERM) INTO :MINTERM
FROM HAVE
WHERE NAME = "&NAME"
;
CREATE TABLE TERMLIST AS
SELECT TERMS
FROM HELP
WHERE TERMS BETWEEN "&MINTERM." and "&MAXTERM."
ORDER BY TERMS
;
CREATE TABLE HEREGONE_&Name AS
SELECT
A.terms ,
"&Name" as Name,
CASE
WHEN TERMS EQ TERM THEN 'Here'
when term is null THEN 'Gone'
end as status
from termlist a left join studtermlist b
on a.terms eq b.term
;
quit;
%MEND RET ;
%RET(Joe);
%RET(Sally);
proc print data=HEREGONE_Joe; run;
proc print data=HEREGONE_Sally; run;
但它不完整。如果我循环通过(可能是相当多的名字)......
*******need procedure for all names - grab info on have ;
proc sql noprint;
select distinct name into :namelist separated by ' '
from have
; quit;
%let n=&sqlobs ;
%MACRO RETYA ;
OPTIONS NONOTEs ;
%do i = 1 %to &n ;
%let currentvalue = %scan(&namelist,&i);
%put ¤tvalue ;
%put &i ;
%RET(¤tvalue);
%IF &i = 1 %then %do ;
data base; set HEREGONE_¤tvalue; run;
%end;
%IF &i gt 1 %then %do ;
proc sql ; create table base as
select * from base
union
select * from HEREGONE_¤tvalue
;
drop table HEREGONE_¤tvalue;
quit;
%end;
%end ;
OPTIONS NOTES;
%MEND;
%RETYA ;
proc sort data=base ; by name terms; run;
proc print data=base; run;
所以现在我想要,但有6000个名字,需要20多分钟。
答案 0 :(得分:2)
让我们尝试替代解决方案。对于每个名称,通过proc SQL数据步骤查找最小/最大术语。然后使用数据步骤创建时间段表并将其与原始表合并。
*Sample data;
data have ;
input name $ term ;
cards;
Joe 2000
Joe 2002
Joe 2008
Sally 2001
Sally 2003
; run;
*find min/max of each name;
proc sql;
create table terms as
select name, min(term) as term_min, max(term) as term_max
from have
group by name
order by name;
quit;
*Create table with the time periods for each name;
data empty;
set terms;
do term=term_min to term_max;
output;
end;
drop term_min term_max;
run;
*Create final table by merging the original table with table previously generated;
proc sql;
create table want as
select a.name, a.term, case when missing(b.term) then 'Gone'
else 'Here' end as status
from empty a
left join have b
on a.name=b.name
and a.term=b.term
order by a.name, a.term;
quit;
编辑:现在看一下你的宏解决方案,部分问题在于你扫描你的桌子的次数太多了。
而不是循环以追加 数据,利用命名约定并使用单个数据 步骤附加所有输出。
%MACRO RET(NAME);
proc sql noprint;
SELECT MIN(TERM), Max(TERM) INTO :MINTERM, :MAXTERM
FROM HAVE
WHERE NAME = "&NAME"
;
CREATE TABLE _HG_&Name AS
SELECT
A.terms ,
"&Name" as Name,
CASE
WHEN TERMS EQ TERM THEN 'Here'
when term is null THEN 'Gone'
end as status
from help a
left join have b
on a.terms eq b.term
and b.name="&name"
where a.terms between "&minterm" and "&maxterm";
;
quit;
%MEND RET ;
*call macro;
proc sort data=have;
by name term;
run;
data _null_;
set have;
by name;
if first.name then do;
str=catt('%ret(', name, ');');
call execute(str);
end;
run;
*append results;
data all;
set _hg:;
run;
答案 1 :(得分:1)
您实际上可以在单个嵌套SQL查询中执行此操作。这将是一个混乱和难以阅读。
我打算将它分解为三个组成部分。
首先,获取不同的名称;
proc sql noprint;
create table names as
select distinct name from have;
quit;
第二,笛卡儿的产品名称和术语以获得所有组合。
proc sql noprint;
create table temp as
select a.name, b.terms as term
from names as a,
help as b;
quit;
第三,左连接找到匹配
proc sql noprint;
create table want as
select a.name,
a.term,
case
when missing(b.term) then "gone"
else "here"
end as Status
from temp as a
left join
have as b
on a.name=b.name
and a.term=b.term;
quit;
最后,删除临时表以节省空间;
proc datasets lib=work nolist;
delete temp;
run;
quit;
正如Reeza所示,还有其他方法可以做到这一点。如上所述,您可以将所有这些合并到一个SQL连接中,并获得所需的结果。根据计算机内存和数据大小,它应该没问题(因为一切都在内存中可能会更快)。
答案 2 :(得分:1)
proc sql;
create table want as
select c.name, c.terms, a.term,
( case when missing(a.term) then "Gone"
else "Here" end ) as status
from (select distinct a.name, b.terms
from have a, help b) c
left join have a
on c.terms = a.term and c.name = a.name
order by c.name, c.terms, a.term
;
答案 3 :(得分:0)
我要投入相似的答案,以便稍后对它们进行比较。
proc sql ;
create table studtermlist as
select distinct term,name
from have
;
create table MAXMINTERM as
SELECT Max(TERM) as MAXTERM, Min(TERM) as MINTERM, name
FROM HAVE
GROUP BY name
;
CREATE TABLE TERMLIST AS
SELECT TERMS,name
FROM HELP a,MAXMINTERM b
WHERE TERMS BETWEEN MINTERM and MAXTERM
ORDER BY name,TERMS
;
CREATE TABLE HEREGONE AS
SELECT
a.terms ,
a.Name ,
CASE
WHEN TERMS EQ TERM THEN 'Here'
when term is null THEN 'Gone'
end as status
from termlist a left join studtermlist b
on a.terms eq b.term
and a.name eq b.name
order by name, terms
;
quit;