尝试创建一个表格,显示给定患者最常见的ndc#(pat_seqno)。到目前为止,我的having子句没有工作,这是我最近的尝试,它返回一个错误,即在贡献表中找不到ndc_count。谢谢你的帮助
proc sql;
create table unique_rx_count as
select pat_seqno , ndc_seqno, ndc_count
from d1
where ndc_seqno in
(select count(ndc_seqno) as ndc_count
from d1
group by pat_seqno)
group by pat_seqno
having ndc_count = max(ndc_count)
;
quit;
例如: Pat_seqno ndc_seqno 2 45 2 45 2 23 2 45 16 10 16 10 16 78
返回预期 Pat_seqno ndc_seqno 2 45 16 10
答案 0 :(得分:1)
这是一个快速而肮脏的解决方案,但它有效。我首先将其分解为三个单独的sql语句:
*count per patient/ndc;
proc sql;
create table step1 as
select pat_seqno, ndc_seqno, count(*) as ndc_count
from d1
group by pat_seqno, ndc_seqno
;
quit;
* maxcount per patient;
proc sql;
create table step2 as
select pat_seqno, max(ndc_count) as ndc_count
from step1
group by pat_seqno
;
quit;
*join count and maxcount;
proc sql;
create table want as
select t1.*
from step1 t1
inner join step2 t2
on t1.pat_seqno = t2.pat_seqno
and t1.ndc_count = t2.ndc_count
;
quit;
如果您愿意,可以将它组合成一个SQL语句
proc sql;
create table want as
select t1.*
from
(
select pat_seqno, ndc_seqno, count(*) as ndc_count
from d1
group by pat_seqno, ndc_seqno
) t1
inner join (
select pat_seqno, max(ndc_count) as ndc_count
from (
select pat_seqno, ndc_seqno, count(*) as ndc_count
from d1
group by pat_seqno, ndc_seqno
)
group by pat_seqno
) t2
on t1.pat_seqno = t2.pat_seqno
and t1.ndc_count = t2.ndc_count
;
quit;
答案 1 :(得分:1)
proc sql;
create table unique_rx_count as
select a.pat_seqno, a.ndc_seqno
from d1 a
group by a.pat_seqno, a.ndc_seqno
having count(*)=
(select max(count)
from
(select c.pat_seqno, c.ndc_seqno, count(c.ndc_seqno) as count
from d1 c
group by c.pat_seqno, c.ndc_seqno)
where c.pat_seqno=a.pat_seqno);
quit;
答案 2 :(得分:0)
也许是这样的:
SELECT a.pat_seqno, a.ndc_seqno, MAX(b.ndc_count)
FROM d1 a
INNER JOIN (
SELECT pat_seqno, ndc_seqno, COUNT(*) AS ndc_count
FROM d1
GROUP BY pat_seqno, ndc_seqno
) b ON b.pat_seqno = a.pat_seqno AND a.ndc_seqno = b.ndc_seqno
GROUP BY a.pat_seqno, a.ndc_seqno