在Redshift上填写日期范围

时间:2016-12-15 15:38:51

标签: sql date amazon-redshift

我有一个返回以下内容的查询。

select
    trunc(t.created),
    sum(count(*)) over (order by trunc(t.created) rows unbounded preceding) as cumulative_sales
from event e
LEFT JOIN  person_tickets t on e.id = t.event_id
where event_id = 9999
group by  trunc(t.created)

Date        cumulative_bookings
2016-02-12  1
2016-02-18  3
2016-02-19  5
2016-02-20  352
2016-02-21  352

我想填写日期系列,以便包含所有日期。

2016-02-12  1
2016-02-13  1
2016-02-14  1
2016-02-15  1
2016-02-16  1
2016-02-17  1
2016-02-18  3
2016-02-19  5
2016-02-20  352
2016-02-21  352

我一直在尝试加入为我生成一个不错的日期序列的代码,但我很难看到优雅地在哪里或如何进行连接。

select (
    getdate()::date - row_number() over (order by true)
  )::date as n
from event limit 500

在下面做了一点进展但不完全

CASE WHEN cumulative_bookings is null then LAG(cumulative_bookings IGNORE NULLS) OVER (ORDER BY n)
ELSE cumulative_bookings END as filled_cumulative_bookings

1 个答案:

答案 0 :(得分:1)

您需要在Redshift中使用数字表。 (In "normal" Postgres you could use generate_series() as per this answer

基本上,您希望创建一个包含最小和最大日期之间所有日期的列表,然后从该列表中继续加入稀疏日期数据。

--Create numbers table - 128 rows (0-127)
DROP TABLE IF EXISTS numbers;
CREATE TABLE numbers (n INTEGER NOT NULL) DISTSTYLE ALL;
INSERT INTO numbers VALUES (0);
INSERT INTO numbers SELECT n + 1 FROM numbers;
INSERT INTO numbers SELECT n + 2 FROM numbers;
INSERT INTO numbers SELECT n + 4 FROM numbers;
INSERT INTO numbers SELECT n + 8 FROM numbers;
INSERT INTO numbers SELECT n + 16 FROM numbers;
INSERT INTO numbers SELECT n + 32 FROM numbers;
INSERT INTO numbers SELECT n + 64 FROM numbers;
-- SELECT n FROM numbers; 

--Create sample table with sparse dates
DROP TABLE IF EXISTS two_dates;
CREATE TABLE two_dates (dtm DATETIME NOT NULL, value INT NOT NULL) DISTSTYLE ALL;
INSERT INTO two_dates VALUES ('2016-12-01',1),('2016-12-07',1);
-- SELECT dtm FROM two_dates;

--First CTE finds Min, Max, and Days in range - 1 row
WITH cte_range
AS (SELECT MIN(dtm) min_dtm
          ,MAX(dtm) max_dtm
          ,DATEDIFF(day,MIN(dtm),MAX(dtm)) dtm_range
    FROM two_dates)
--Second CTE creates list of dates between Min and Max dates
, cte_dtm_list
AS (SELECT  DATEADD(day,numbers.n,cte_range.min_dtm) dtm
    FROM       cte_range
    CROSS JOIN numbers
    WHERE numbers.n <= cte_range.dtm_range)
--Finally we left join to the sparse date data
SELECT  cte_dtm_list.dtm
       ,COALESCE(two_dates.value,0) value
       ,SUM(COALESCE(two_dates.value,0)) OVER (ORDER BY cte_dtm_list.dtm ROWS UNBOUNDED PRECEDING) as cume
FROM         cte_dtm_list
LEFT JOIN    two_dates
    ON  cte_dtm_list.dtm = two_dates.dtm
ORDER BY cte_dtm_list.dtm
;
--           dtm         | value | cume 
--  ---------------------+-------+------
--   2016-12-01 00:00:00 |     1 |    1
--   2016-12-02 00:00:00 |     0 |    1
--   2016-12-03 00:00:00 |     0 |    1
--   2016-12-04 00:00:00 |     0 |    1
--   2016-12-05 00:00:00 |     0 |    1
--   2016-12-06 00:00:00 |     0 |    1
--   2016-12-07 00:00:00 |     1 |    2