我正在寻找一个查询来进行以下转换。
基本上我想在每个网站每天的基础上找到前3个频繁的SELL_COUNTRY和前3个频繁类别。 (例如,网站1,日期为6-5-2017,SELL_COUNTRY有2 * US,1 * JP和1 * UK,因此TOP1_SELL_COUNTRY为US,JP和UK为TOP2_SELL_COUNTRY和TOP3_SELL_COUNTRY。同样的想法为CATEGORY列)
我当前的解决方案涉及许多子查询,这有效,但我觉得它太复杂了。我对sql master如何以优雅的方式做到这一点感兴趣。
目前我知道如何使用
答案 0 :(得分:1)
我会分三步完成:
CASE
的结果只是您的值和许多NULL
值,因此min()
输出价值)像这样:
WITH
countries as (
SELECT *, row_number() over (partition by website,date order by count desc)
FROM (
SELECT
website
,date::date
,sell_country
,count(1)
FROM your_table
GROUP BY 1,2,3
)
)
,categories as (
SELECT *, row_number() over (partition by website,date order by count desc)
FROM (
SELECT
website
,date::date
,category
,count(1)
FROM your_table
GROUP BY 1,2,3
)
)
SELECT
website
,date
,coalesce(min(case when t1.row_number=1 then t1.sell_country end),'NA') as top1_sell_country
,coalesce(min(case when t1.row_number=2 then t1.sell_country end),'NA') as top2_sell_country
,coalesce(min(case when t1.row_number=3 then t1.sell_country end),'NA') as top3_sell_country
,coalesce(min(case when t2.row_number=1 then t2.category end),'NA') as top1_sell_category
,coalesce(min(case when t2.row_number=2 then t2.category end),'NA') as top2_sell_category
,coalesce(min(case when t2.row_number=3 then t2.category end),'NA') as top3_sell_category
FROM countries t1
FULL JOIN categories t2
USING (website,date)
GROUP BY 1,2
ORDER BY 1,2
答案 1 :(得分:0)
WITH a1 AS
(
SELECT *,
COUNT(*) OVER( PARTITION BY website,SUBSTRING(visit_date,1,8),sell_country ) AS sell_cntry,
COUNT(*) OVER( PARTITION BY website,SUBSTRING(visit_date,1,8),pur_country ) AS pur_cntry
FROM Yourtable
),
a2 AS
(
SELECT website,
visit_date,
sell_country,
RANK() OVER ( PARTITION BY website,SUBSTRING(visit_date,1,8) ORDER BY sell_cntry DESC ) AS sell_cntry_rnk
FROM a1
),
a3 AS
(
SELECT website,
visit_date,
pur_country,
RANK() OVER ( PARTITION BY website,SUBSTRING(visit_date,1,8) ORDER BY pur_cntry DESC ) AS pur_cntry_rnk
FROM a1
),
a4 AS
(
SELECT a2.website AS company,
a2.v_date,
CASE WHEN a2.sell_cntry_rn = 1 THEN a2.sell_country END AS TOP1_SELL_COUNTRY,
CASE WHEN a2.sell_cntry_rn = 2 THEN a2.sell_country END AS TOP2_SELL_COUNTRY,
CASE WHEN a2.sell_cntry_rn = 3 THEN a2.sell_country END AS TOP3_SELL_COUNTRY,
CASE WHEN a3.pur_cntry_rn = 1 THEN a3.pur_country END AS TOP1_PUR_COUNTRY,
CASE WHEN a3.pur_cntry_rn = 2 THEN a3.pur_country END AS TOP2_PUR_COUNTRY,
CASE WHEN a3.pur_cntry_rn = 3 THEN a3.pur_country END AS TOP3_PUR_COUNTRY
FROM (
SELECT Z.*,
ROW_NUMBER() OVER( PARTITION BY website,v_date ORDER BY sell_cntry_rnk,sell_country ) AS sell_cntry_rn
FROM
(
SELECT DISTINCT website,
SUBSTRING(visit_date,1,8) AS v_date,
sell_cntry_rnk,
sell_country
FROM a2
) Z
WHERE Z.sell_cntry_rnk <= 3
) a2
INNER JOIN
(
SELECT *,
ROW_NUMBER() OVER( PARTITION BY website,v_date ORDER BY pur_cntry_rnk,pur_country ) AS pur_cntry_rn
FROM
( SELECT DISTINCT website,
SUBSTRING(visit_date,1,8) AS v_date,
pur_cntry_rnk,
pur_country
FROM a3
) Z
WHERE Z.pur_cntry_rnk <= 3
) a3
ON a2.website = a3.website
AND a2.v_date = a3.v_date
),
a5 AS
(
SELECT company,
v_date,
MAX(TOP1_SELL_COUNTRY) AS TOP1_SELL_COUNTRY,
MAX(TOP2_SELL_COUNTRY) AS TOP2_SELL_COUNTRY,
MAX(TOP3_SELL_COUNTRY) AS TOP3_SELL_COUNTRY,
MAX(TOP1_PUR_COUNTRY) AS TOP1_PUR_COUNTRY,
MAX(TOP2_PUR_COUNTRY) AS TOP2_PUR_COUNTRY,
MAX(TOP3_PUR_COUNTRY) AS TOP3_PUR_COUNTRY
FROM a4
GROUP BY company,
v_date
)
SELECT company,
v_date,
CASE WHEN TOP1_SELL_COUNTRY IS NULL THEN 'NA' ELSE TOP1_SELL_COUNTRY END AS TOP1_SELL_COUNTRY,
CASE WHEN TOP2_SELL_COUNTRY IS NULL THEN 'NA' ELSE TOP2_SELL_COUNTRY END AS TOP2_SELL_COUNTRY,
CASE WHEN TOP3_SELL_COUNTRY IS NULL THEN 'NA' ELSE TOP3_SELL_COUNTRY END AS TOP3_SELL_COUNTRY,
CASE WHEN TOP1_PUR_COUNTRY IS NULL THEN 'NA' ELSE TOP1_PUR_COUNTRY END AS TOP1_PUR_COUNTRY,
CASE WHEN TOP2_PUR_COUNTRY IS NULL THEN 'NA' ELSE TOP2_PUR_COUNTRY END AS TOP2_PUR_COUNTRY,
CASE WHEN TOP3_PUR_COUNTRY IS NULL THEN 'NA' ELSE TOP3_PUR_COUNTRY END AS TOP3_PUR_COUNTRY
FROM a5
ORDER BY company,v_date;