我在Redshift中有这张桌子:
user_id | music | comedy | drama | t1 | t2 |
_______________________________________________
user1 | 1 | 2 | 0 | 0 |1 |
user2 | 0 | 0 | 1 | 1 |0 |
user3 | 1 | 2 | 0 | 2 |2 |
user4 | 1 | 2 | 1 | 0 |3 |
我需要使用SQL的输出:
category| topic | count category| count topic
_____________________________________________
music | t1 | 3 | 2
music | t2 | 3 | 6
comedy | t1 | 6 | 2
comedy | t2 | 6 | 6
drama | t1 | 2 | 1
drama | t2 | 2 | 3
基本上,我需要计算访问某个类别的用户还访问了某个主题的次数。
我知道如何使用python进行操作,但是我是SQL新手,所以需要您的帮助!
答案 0 :(得分:0)
您需要取消数据透视,然后重新聚合。遵循确切的逻辑有点困难,但是我认为您想要这样做:
select category, topic,
sum(count(*)) over (partition by category) as cnt_category,
sum(cnt) as cnt_topic
from ((select 'music' as category, 't1' as topic, t1 as cnt
from t
where music <> 0 and t1 <> 0
) union all
(select 'music', 't2', t2 as cnt
from t
where music <> 0 and t1 <> 0
) union all
(select 'comedy', 't1', t1 as cnt
from t
where comedy <> 0 and t1 <> 0
) union all
(select 'comedy', 't2', t2 as cnt
from t
where comedy <> 0 and t1 <> 0
) union all
(select 'music', 't2', t2 as cnt
from t
where music <> 0 and t1 <> 0
) union all
(select 'drama', 't1', t1 as cnt
from t
where drama <> 0 and t1 <> 0
) union all
(select 'drama', 't2', t2 as cnt
from t
where drama <> 0 and t1 <> 0
)
) t
group by category, topic;
答案 1 :(得分:0)
我已经设法通过使用unpivot
来获得解决方案,但是它变得有点冗长。
解决方案1 -如果数据库中存在表
源表名称table1
with table2 as
(select music,comedy,drama,t1,t2,
sum(music) over() as cnt_music,
sum(comedy) over() as cnt_comedy,
sum(drama) over() as cnt_drama,
(select sum(t1) from table1 where (t1 * music) >0 ) as ccc,
(select sum(t1) from table1 where (t1 * comedy)>0 ) as cc1,
(select sum(t1) from table1 where (t1 * drama) >0 ) as cc2,
(select sum(t2) from table1 where (t2 * music) >0 ) as cc3,
(select sum(t2) from table1 where (t2 * comedy)>0 ) as cc4,
(select sum(t2) from table1 where (t2 * drama) >0 ) as cc5
from table1)
select category,topic,case when category = 'COMEDY' then cnt_comedy
when category = 'DRAMA' then cnt_drama
when category = 'MUSIC' then cnt_music
end "count category",
case when category = 'MUSIC' and TOPIC = 'T1' then ccc
when category = 'COMEDY' and TOPIC = 'T1' then cc1
when category = 'DRAMA' and TOPIC = 'T1' then cc2
when category = 'MUSIC' and TOPIC = 'T2' then cc3
when category = 'COMEDY' and TOPIC = 'T2' then cc4
when category = 'DRAMA' and TOPIC = 'T2' then cc5
end "count topic"
from table2
unpivot( c1 for category in (music , comedy , drama) ) p
unpivot( t1 for topic in (t1,t2) ) q
group by category,topic,cnt_music,cnt_comedy,cnt_drama,ccc,cc1,cc2,cc3,cc4,cc5
order by category;
OR
解决方案2 -如果数据库中不存在表
with table1 as (
select 'user1' user_id , 1 music , 2 comedy , 0 drama , 0 t1 ,1 t2 from dual union all
select 'user2' user_id , 0 music , 0 comedy , 1 drama , 1 t1 ,0 t2 from dual union all
select 'user3' user_id , 1 music , 2 comedy , 0 drama , 2 t1 ,2 t2 from dual union all
select 'user4' user_id , 1 music , 2 comedy , 1 drama , 0 t1 ,3 t2 from dual),
table2 as
(select music,comedy,drama,t1,t2,
sum(music) over() as cnt_music,
sum(comedy) over() as cnt_comedy,
sum(drama) over() as cnt_drama,
(select sum(t1) from table1 where (t1 * music) >0 ) as ccc,
(select sum(t1) from table1 where (t1 * comedy)>0 ) as cc1,
(select sum(t1) from table1 where (t1 * drama) >0 ) as cc2,
(select sum(t2) from table1 where (t2 * music) >0 ) as cc3,
(select sum(t2) from table1 where (t2 * comedy)>0 ) as cc4,
(select sum(t2) from table1 where (t2 * drama) >0 ) as cc5
from table1)
select category,topic,case when category = 'COMEDY' then cnt_comedy
when category = 'DRAMA' then cnt_drama
when category = 'MUSIC' then cnt_music
end "count category",
case when category = 'MUSIC' and TOPIC = 'T1' then ccc
when category = 'COMEDY' and TOPIC = 'T1' then cc1
when category = 'DRAMA' and TOPIC = 'T1' then cc2
when category = 'MUSIC' and TOPIC = 'T2' then cc3
when category = 'COMEDY' and TOPIC = 'T2' then cc4
when category = 'DRAMA' and TOPIC = 'T2' then cc5
end "count topic"
from table2
unpivot( c1 for category in (music , comedy , drama) ) p
unpivot( t1 for topic in (t1,t2) ) q
group by category,topic,cnt_music,cnt_comedy,cnt_drama,ccc,cc1,cc2,cc3,cc4,cc5
order by category;