SQL Redshift:转置并按多列分组

时间:2019-08-02 09:09:18

标签: sql amazon-redshift

我在Redshift中有这张桌子:

user_id | music | comedy | drama | t1   | t2  |
_______________________________________________

user1   | 1     | 2      | 0     | 0    |1    |
user2   | 0     | 0      | 1     | 1    |0    |
user3   | 1     | 2      | 0     | 2    |2    |
user4   | 1     | 2      | 1     | 0    |3    |

我需要使用SQL的输出:

category| topic | count category| count topic
_____________________________________________

music   | t1    | 3             | 2      
music   | t2    | 3             | 6      
comedy  | t1    | 6             | 2      
comedy  | t2    | 6             | 6      
drama   | t1    | 2             | 1      
drama   | t2    | 2             | 3 

基本上,我需要计算访问某个类别的用户还访问了某个主题的次数。

我知道如何使用python进行操作,但是我是SQL新手,所以需要您的帮助!

2 个答案:

答案 0 :(得分:0)

您需要取消数据透视,然后重新聚合。遵循确切的逻辑有点困难,但是我认为您想要这样做:

select category, topic,
       sum(count(*)) over (partition by category) as cnt_category,
       sum(cnt) as cnt_topic
from ((select 'music' as category, 't1' as topic, t1 as cnt
       from t
       where music <> 0 and t1 <> 0
      ) union all
      (select 'music', 't2', t2 as cnt
       from t
       where music <> 0 and t1 <> 0
      ) union all
      (select 'comedy', 't1', t1 as cnt
       from t
       where comedy <> 0 and t1 <> 0
      ) union all
      (select 'comedy', 't2', t2 as cnt
       from t
       where comedy <> 0 and t1 <> 0
      ) union all
      (select 'music', 't2', t2 as cnt
       from t
       where music <> 0 and t1 <> 0
      ) union all
      (select 'drama', 't1', t1 as cnt
       from t
       where drama <> 0 and t1 <> 0
      ) union all
      (select 'drama', 't2', t2 as cnt
       from t
       where drama <> 0 and t1 <> 0
      )
     ) t
group by category, topic;

答案 1 :(得分:0)

我已经设法通过使用unpivot来获得解决方案,但是它变得有点冗长。

解决方案1 ​​-如果数据库中存在表

源表名称table1

with table2 as 
(select music,comedy,drama,t1,t2,
        sum(music)  over() as cnt_music, 
        sum(comedy) over() as cnt_comedy,
        sum(drama)  over() as cnt_drama, 
        (select sum(t1) from table1  where (t1 * music) >0 ) as ccc,
        (select sum(t1) from table1  where (t1 * comedy)>0 ) as cc1,
        (select sum(t1) from table1  where (t1 * drama) >0 ) as cc2,
        (select sum(t2) from table1  where (t2 * music) >0 ) as cc3,
        (select sum(t2) from table1  where (t2 * comedy)>0 ) as cc4,
        (select sum(t2) from table1  where (t2 * drama) >0 ) as cc5
  from table1)
select category,topic,case when category = 'COMEDY' then  cnt_comedy 
                           when category = 'DRAMA'  then  cnt_drama
                           when category = 'MUSIC'  then  cnt_music                                     
                       end "count category",
                       case when category = 'MUSIC'  and TOPIC = 'T1' then  ccc
                            when category = 'COMEDY'   and TOPIC = 'T1' then  cc1
                            when category = 'DRAMA'   and TOPIC = 'T1' then  cc2  
                            when category = 'MUSIC'  and TOPIC = 'T2' then  cc3
                            when category = 'COMEDY'   and TOPIC = 'T2' then  cc4
                            when category = 'DRAMA'   and TOPIC = 'T2' then  cc5                                     
                       end "count topic"
  from table2
unpivot( c1  for category in (music , comedy , drama) ) p
unpivot( t1  for topic in (t1,t2) ) q
group by category,topic,cnt_music,cnt_comedy,cnt_drama,ccc,cc1,cc2,cc3,cc4,cc5
order by category;

OR

解决方案2 -如果数据库中不存在表

with table1 as (
select 'user1'  user_id , 1  music    , 2  comedy   , 0  drama   , 0  t1  ,1   t2 from dual union all
select 'user2'  user_id , 0  music    , 0  comedy   , 1  drama   , 1  t1  ,0   t2 from dual  union all
select 'user3'  user_id , 1  music    , 2  comedy   , 0  drama   , 2  t1  ,2   t2 from dual  union all
select 'user4'  user_id , 1  music    , 2  comedy   , 1  drama   , 0  t1  ,3   t2 from dual),
table2 as 
(select music,comedy,drama,t1,t2,
        sum(music)  over() as cnt_music, 
        sum(comedy) over() as cnt_comedy,
        sum(drama)  over() as cnt_drama, 
        (select sum(t1) from table1  where (t1 * music) >0 ) as ccc,
        (select sum(t1) from table1  where (t1 * comedy)>0 ) as cc1,
        (select sum(t1) from table1  where (t1 * drama) >0 ) as cc2,
        (select sum(t2) from table1  where (t2 * music) >0 ) as cc3,
        (select sum(t2) from table1  where (t2 * comedy)>0 ) as cc4,
        (select sum(t2) from table1  where (t2 * drama) >0 ) as cc5
  from table1)
select category,topic,case when category = 'COMEDY' then  cnt_comedy 
                           when category = 'DRAMA'  then  cnt_drama
                           when category = 'MUSIC'  then  cnt_music                                     
                       end "count category",
                       case when category = 'MUSIC'  and TOPIC = 'T1' then  ccc
                            when category = 'COMEDY'   and TOPIC = 'T1' then  cc1
                            when category = 'DRAMA'   and TOPIC = 'T1' then  cc2  
                            when category = 'MUSIC'  and TOPIC = 'T2' then  cc3
                            when category = 'COMEDY'   and TOPIC = 'T2' then  cc4
                            when category = 'DRAMA'   and TOPIC = 'T2' then  cc5                                     
                       end "count topic"
  from table2
unpivot( c1  for category in (music , comedy , drama) ) p
unpivot( t1  for topic in (t1,t2) ) q
group by category,topic,cnt_music,cnt_comedy,cnt_drama,ccc,cc1,cc2,cc3,cc4,cc5
order by category;