Question

尝试创建一个表格，显示给定患者最常见的ndc＃（pat_seqno）。到目前为止，我的having子句没有工作，这是我最近的尝试，它返回一个错误，即在贡献表中找不到ndc_count。谢谢你的帮助

proc sql;
create table unique_rx_count as
select pat_seqno , ndc_seqno, ndc_count
from d1
where ndc_seqno in 
    (select count(ndc_seqno) as ndc_count
    from d1
    group by pat_seqno)
group by pat_seqno
having ndc_count =  max(ndc_count)
   ;
quit;

例如： Pat_seqno ndc_seqno 2 45 2 45 2 23 2 45 16 10 16 10 16 78

返回预期 Pat_seqno ndc_seqno 2 45 16 10

Answer 1

这是一个快速而肮脏的解决方案，但它有效。我首先将其分解为三个单独的sql语句：

*count per patient/ndc;
proc sql; 
 create table step1 as 
 select pat_seqno, ndc_seqno, count(*) as ndc_count
 from d1
 group by pat_seqno, ndc_seqno
 ;
quit;

* maxcount per patient;
proc sql;      
 create table step2 as
 select pat_seqno, max(ndc_count) as ndc_count
 from step1
 group by pat_seqno
 ;
quit;

*join count and maxcount;
proc sql;      
 create table want as
 select t1.*
 from step1 t1
 inner join step2 t2
 on t1.pat_seqno = t2.pat_seqno
 and t1.ndc_count = t2.ndc_count
 ;
quit;

如果您愿意，可以将它组合成一个SQL语句

proc sql;
 create table want as
 select t1.*
 from 
 (
  select pat_seqno, ndc_seqno, count(*) as ndc_count
  from d1
  group by pat_seqno, ndc_seqno
 ) t1
 inner join (
  select pat_seqno, max(ndc_count) as ndc_count
  from  (
   select pat_seqno, ndc_seqno, count(*) as ndc_count
   from d1
   group by pat_seqno, ndc_seqno
  )
 group by pat_seqno
 ) t2
 on t1.pat_seqno = t2.pat_seqno
 and t1.ndc_count = t2.ndc_count
 ;
 quit;

Answer 2

proc sql;
create table unique_rx_count as

select a.pat_seqno, a.ndc_seqno
from d1 a
group by a.pat_seqno, a.ndc_seqno
having count(*)=
        (select max(count)
        from
           (select c.pat_seqno, c.ndc_seqno, count(c.ndc_seqno) as count
           from d1 c
           group by c.pat_seqno, c.ndc_seqno)
        where c.pat_seqno=a.pat_seqno);
quit;

Answer 3

也许是这样的：

SELECT a.pat_seqno, a.ndc_seqno, MAX(b.ndc_count)
FROM d1 a
INNER JOIN (
  SELECT pat_seqno, ndc_seqno, COUNT(*) AS ndc_count
  FROM d1
  GROUP BY pat_seqno, ndc_seqno
  ) b ON b.pat_seqno = a.pat_seqno AND a.ndc_seqno = b.ndc_seqno
GROUP BY a.pat_seqno, a.ndc_seqno

返回每组中最常见的值

3 个答案: