返回每组中最常见的值

时间:2013-11-26 14:26:28

标签: sql sas proc-sql

尝试创建一个表格,显示给定患者最常见的ndc#(pat_seqno)。到目前为止,我的having子句没有工作,这是我最近的尝试,它返回一个错误,即在贡献表中找不到ndc_count。谢谢你的帮助

proc sql;
create table unique_rx_count as
select pat_seqno , ndc_seqno, ndc_count
from d1
where ndc_seqno in 
    (select count(ndc_seqno) as ndc_count
    from d1
    group by pat_seqno)
group by pat_seqno
having ndc_count =  max(ndc_count)
   ;
quit;

例如:     Pat_seqno ndc_seqno       2 45       2 45       2 23       2 45       16 10       16 10       16 78

返回预期      Pat_seqno ndc_seqno         2 45         16 10

3 个答案:

答案 0 :(得分:1)

这是一个快速而肮脏的解决方案,但它有效。我首先将其分解为三个单独的sql语句:

*count per patient/ndc;
proc sql; 
 create table step1 as 
 select pat_seqno, ndc_seqno, count(*) as ndc_count
 from d1
 group by pat_seqno, ndc_seqno
 ;
quit;

* maxcount per patient;
proc sql;      
 create table step2 as
 select pat_seqno, max(ndc_count) as ndc_count
 from step1
 group by pat_seqno
 ;
quit;

*join count and maxcount;
proc sql;      
 create table want as
 select t1.*
 from step1 t1
 inner join step2 t2
 on t1.pat_seqno = t2.pat_seqno
 and t1.ndc_count = t2.ndc_count
 ;
quit;

如果您愿意,可以将它组合成一个SQL语句

proc sql;
 create table want as
 select t1.*
 from 
 (
  select pat_seqno, ndc_seqno, count(*) as ndc_count
  from d1
  group by pat_seqno, ndc_seqno
 ) t1
 inner join (
  select pat_seqno, max(ndc_count) as ndc_count
  from  (
   select pat_seqno, ndc_seqno, count(*) as ndc_count
   from d1
   group by pat_seqno, ndc_seqno
  )
 group by pat_seqno
 ) t2
 on t1.pat_seqno = t2.pat_seqno
 and t1.ndc_count = t2.ndc_count
 ;
 quit;

答案 1 :(得分:1)

proc sql;
create table unique_rx_count as

select a.pat_seqno, a.ndc_seqno
from d1 a
group by a.pat_seqno, a.ndc_seqno
having count(*)=
        (select max(count)
        from
           (select c.pat_seqno, c.ndc_seqno, count(c.ndc_seqno) as count
           from d1 c
           group by c.pat_seqno, c.ndc_seqno)
        where c.pat_seqno=a.pat_seqno);
quit;

答案 2 :(得分:0)

也许是这样的:

SELECT a.pat_seqno, a.ndc_seqno, MAX(b.ndc_count)
FROM d1 a
INNER JOIN (
  SELECT pat_seqno, ndc_seqno, COUNT(*) AS ndc_count
  FROM d1
  GROUP BY pat_seqno, ndc_seqno
  ) b ON b.pat_seqno = a.pat_seqno AND a.ndc_seqno = b.ndc_seqno
GROUP BY a.pat_seqno, a.ndc_seqno