我知道标题很难理解,我很抱歉。找不到更好的方法来解释我的问题。
我有这样的事情:
DATA HAVE;
INPUT POLID PRODID TOSNUM PREMID X_INSURER_SERIAL_NO;
DATALINES;
13102100206 310 0 20 .
13102100207 310 0 20 .
13102100207 310 0 21 .
13102100207 310 1 1 .
13102100207 310 1 2 .
13102100207 310 1 3 .
13102100207 310 1 20 .
13102100207 310 1 21 .
13102100207 310 2 1 .
13102100207 310 2 2 .
13102100207 310 2 20 .
13102100207 310 2 21 .
13102100207 310 3 1 .
13102100207 310 3 2 .
13102100207 310 3 9 .
13102100207 310 3 20 .
13102100207 310 3 21 .
13102100209 310 500 20 5
13102100210 310 100 20 1
13102100210 310 101 21 1
13102100210 310 100 1 1
13102100210 310 101 2 1
13102100210 310 101 3 1
13102100211 310 100 20 1
13102100211 310 101 21 1
13102100211 310 200 1 2
13102100211 310 201 2 2
13102100211 310 201 3 2
;RUN;
我需要做的是,
如果在当前行中缺少则添加premid,但在每个polid + prodid的前一行中都存在。
例如, polid = 13102100207,prodid = 310。
当tosnum = 1时,我有5种不同的preid(1,2,3,20,21)。 当tosnum = 2时,我有4种不同的preid(1,2,20,21)。 premid = 3缺失。
我需要添加这个样本,并在每次添加样本时在fictive_ind列中添加1。
例如,polid = 13102100206 只有一行,而且,计算中不包括,因为它没有以前的tosnum。
当X_INSURER_SERIAL_NO没有丢失时,它应该替换tosnum。
所以, 例如,polid = 13102100210 sholud也可以从计算中排除, 因为它没有任何其他先前的值(x_insurer_serial_no总是等于1)。
这就是我所做的:
首先,当x_insurer_serial_no没有丢失时替换tosnum:
PROC SQL;
CREATE TABLE AAA
AS
SELECT DISTINCT POLID, PRODID ,
CASE WHEN X_INSURER_SERIAL_NO = . THEN TOSNUM ELSE X_INSURER_SERIAL_NO END as X_INSURER_SERIAL_NO
FROM HAVE
;QUIT;
然后,
我使用下面的代码,找出是否包含或排除计算中的polid sholud:
PROC SQL;
CREATE TABLE BBB AS
SELECT POLID, PRODID, COUNT(*) AS COUNT
FROM AAA
GROUP BY
POLID, PRODID
;QUIT;
我只接受count> 1的计算行。
PROC SQL;
CREATE TABLE ABC AS
SELECT * FROM HAVE
WHERE
CATS(POLID,PRODID) IN
SELECT CATS(POLID,PRODID) FROM BBB
WHERE COUNT>1;
QUIT;
现在我找到循环的最小值和最大值:
proc sql;
select min(TOSNUM), max(TOSNUM) , max(TOSNUM) - min(TOSNUM) into: min_TOSNUM, :max_TOSNUM, :diff
from abc;
quit;
然后循环:
%macro fictive_premid(min_TOSNUM =,diff =);
proc sql; 创建表fictive_premid 选择POLID,PRODID,TOSNUM,PREMID,0作为FICTIVE_IND 来自abc 其中TOSNUM =& min_TOSNUM; 退出;
%do i = &min_TOSNUM %to %eval(&diff-1) ;
proc sql;
create table fictive_premid_t as
select * from
(
select polid, prodid, %eval(&i+1) as TOSNUM, premid, 1 as FICTIVE_IND
from fictive_premid
where TOSNUM = &i
except
select polid, prodid, %eval(&i+1) as TOSNUM , premid, 1 as FICTIVE_IND
from abc
where TOSNUM = %eval(&i+1)
)
union
select polid, prodid,%eval(&i+1) as TOSNUM, premid, 0 as FICTIVE_IND
from abc
where TOSNUM = %eval(&i+1);
quit;
proc append base=fictive_premid data=fictive_premid_t; run;
%end;
by polid prodid TOSNUM premid
%mend;
%fictive_premid (min_TOSNUM = &min_TOSNUM, diff = &diff );
现在,
我的问题,例如, 当& i的值= 4时 和polid = 13102100207 ....
最后tosnum等于3, 我得到6个新行,其中tosnum = 4,fictive_ind = 1。 发生这种情况,当然直到& i达到& diff。
的值请问你,
是一种在到达最后一个tosnum或x_insurer_serial_no时停止itterating的方法。
非常感谢您的帮助。
Hopefuly我很好地解释了我的需求。
答案 0 :(得分:1)
如果我理解你的问题,我认为这可以做你想要的。这取决于TOSNUM值是增量值(即1,2,3或4,5,6),没有跳过。
/*Overwrite TOSNUM with X_INSURER_SERIAL_NO
if it exists*/
data have;
set have;
tosnum = coalesce(X_INSURER_SERIAL_NO,TOSNUM);
run;
proc sort data=have;
by POLID PRODID TOSNUM PREMID;
run;
proc sql noprint;
/*Find polid values to delete*/
create view dist as
select distinct polid,
prodid,
tosnum
from have;
create table toDelete as
select polid,
count(*) as count
from dist
group by polid
having count = 1;
/*Delete single values*/
delete from have
where polid in
( select polid from toDelete );
create table lastTosnum as
select POLID, PRODID, max(TOSNUM)+1 as Tosnum
from have
group by POLID, PRODID;
quit;
/*Increment TOSNUM by 1*/
data nextVals;
set have(drop=X_INSURER_SERIAL_NO );
TOSNUM = tosnum + 1;
run;
/*Delete the last TOSNUM from the set above*/
proc sql noprint;
delete from nextVals as nv
where nv.tosnum in
(select tosnum
from lastTosnum as lts
where lts.polid = nv.polid
and lts.prodid = nv.prodid
);
quit;
/*Merge to get the final set.*/
data want;
merge have (in=h) nextVals (in=next);
by POLID PRODID TOSNUM PREMID;
if next and ^h then
fictive_ind = 1;
else
fictive_ind = 0;
/*decrement tosnum*/
tosnum = tosnum-1;
run;