表ProductOrder列包括:
id shopid starttime endtime
1 123 2018-04-27 2018-04-28
2 234 2018-04-23 2018-04-30
3 189 2018-05-01 2018-05-30
4 321 2018-05-01 2018-05-29
我不想查询两天之间的有效店铺数量并按最近一个月的每一天计算,有效的店铺数量表示开始时间< = $ curDate< = endtime,而curDate是每天的变量最糟糕的一个月。
今天是2018-04-27,所以查询结果应为:
day count
2018-04-27 2
2018-04-26 1
2018-04-25 1
2018-04-24 1
2018-04-23 1
2018-04-22 0
2018-04-21 0
……………………………………
2018-03-26 0
我在MYSQL中实现了这个要求。这个SQL在MYSQL中可以很好地工作。如何转换为Hive Sql?
SELECT
DATE_SUB(DATE(NOW()), INTERVAL days_ago.days DAY) day,
COUNT(distinct(shopID)) count
FROM
(SELECT 0 days UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION
SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9 UNION
SELECT 10 UNION SELECT 11 UNION SELECT 12 UNION SELECT 13 UNION SELECT 14 UNION
SELECT 15 UNION SELECT 16 UNION SELECT 17 UNION SELECT 18 UNION SELECT 19 UNION
SELECT 20 UNION SELECT 21 UNION SELECT 22 UNION SELECT 23 UNION SELECT 24 UNION
SELECT 25 UNION SELECT 26 UNION SELECT 27 UNION SELECT 28 UNION SELECT 29)
AS days_ago
LEFT JOIN ProductOrder
ON DATE_SUB(DATE(NOW()), INTERVAL days_ago.days DAY) <= ProductOrder.endtime
AND DATE_SUB(DATE(NOW()), INTERVAL days_ago.days DAY) >= ProductOrder.starttime
AND status = 2
GROUP BY days_ago.days;
答案 0 :(得分:1)
Hive
不支持非等连接条件,它们可以放在WHERE子句中。使用STACK
代替许多UNION子查询。
select DATE_SUB(CURRENT_DATE, days_ago.days) day,
COUNT(DISTINCT(shopID)) count
from
(
select stack(30, --the number of elements
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29) as (days)
) days_ago
LEFT JOIN ProductOrder po ON status = 2
WHERE (DATE_SUB(CURRENT_DATE, days_ago.days) <= po.endtime
AND DATE_SUB(CURRENT_DATE, days_ago.days) >= po.starttime)
OR po.shopID is NULL --allow nulls
GROUP BY DATE_SUB(CURRENT_DATE, days_ago.days);
答案 1 :(得分:0)
SELECT DATE_SUB(CURRENT_DATE, days_ago.days),
COUNT(DISTINCT(shopID)) count
FROM
(
SELECT explode(array(
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) as days
) days_ago
LEFT JOIN ProductOrder po ON
(
DATE_SUB(CURRENT_DATE, days_ago.days) <= po.endtime
AND DATE_SUB(CURRENT_DATE, days_ago.days) >= po.starttime
AND status = 2
)
GROUP BY days_ago.days;