我有几个表格,其中TSTZRANGE
值部分重叠,我需要JOIN
这些表,以便为每个范围的每个UPPER
和LOWER
边界创建“断点” 。
一些示例数据:
CREATE TABLE foo (
foo_id SERIAL PRIMARY KEY,
foo TEXT,
active_timespan TSTZRANGE
);
INSERT INTO
foo (foo, active_timespan)
VALUES
('One',TSTZRANGE('2015-01-01 00:00:00-00','2015-01-01 06:00:00-00')),
('Two',TSTZRANGE('2015-01-01 12:00:00-00','2015-01-01 18:00:00-00'));
CREATE TABLE bar (
bar_id SERIAL PRIMARY KEY,
bar TEXT,
active_timespan TSTZRANGE
);
INSERT INTO
bar (bar, active_timespan)
VALUES
('Alpha',TSTZRANGE('2015-01-01 03:00:00-00','2015-01-01 09:00:00-00')),
('Bravo',TSTZRANGE('2015-01-01 15:00:00-00','2015-01-01 21:00:00-00'));
期望的结果:
active_timespan | foo | bar
------------------------------------------------------|------|------
'["2015-01-01 00:00:00-00","2015-01-01 03:00:00-00"]' | One | Null
'["2015-01-01 03:00:00-00","2015-01-01 06:00:00-00"]' | One | Alpha
'["2015-01-01 06:00:00-00","2015-01-01 09:00:00-00"]' | Null | Alpha
'["2015-01-01 12:00:00-00","2015-01-01 15:00:00-00"]' | Two | Null
'["2015-01-01 15:00:00-00","2015-01-01 18:00:00-00"]' | Two | Bravo
'["2015-01-01 18:00:00-00","2015-01-01 21:00:00-00"]' | Null | Bravo
目前的做法:
我目前使用精心设计的cte
系统,我从每个表中选择我想要的数据到自己的cte
然后UNION
所有不同的UPPER
每个查询的{}和LOWER
范围值,为我留下TIMESTAMPTZ
值列表。然后我使用LEAD
窗口函数来创建新的TSTZRANGE
值。完成所有操作后,我将原始cte
加入TSTZRANGE
值的新列表。
下面的当前查询模式:
WITH
cte_foo AS (
SELECT
foo_id,
foo,
active_timespan
FROM
foo
WHERE
active_timespan && $1
)
, cte_bar AS (
SELECT
bar_id,
bar,
active_timespan
FROM
bar
WHERE
active_timespan && $1
)
-- continue for each table to be joined...
-- , cte_baz AS (
-- SELECT
-- baz_id,
-- baz,
-- active_timespan
-- FROM
-- baz
-- WHERE
-- active_timespan && $1
-- )
, cte_times AS (
SELECT DISTINCT
UNNEST(
ARRAY[
LOWER(cte_foo.active_timespan),
UPPER(cte_foo.active_timespan)
]
) AS breakpoint
FROM
cte_foo
UNION
SELECT DISTINCT
UNNEST(
ARRAY[
LOWER(cte_bar.active_timespan),
UPPER(cte_bar.active_timespan)
]
) AS breakpoint
FROM
cte_bar
-- continue for each table to be joined...
-- UNION
-- SELECT DISTINCT
-- UNNEST(
-- ARRAY[
-- LOWER(cte_baz.active_timespan),
-- UPPER(cte_baz.active_timespan)
-- ]
-- ) AS breakpoint
-- FROM
-- cte_baz
)
, cte_timespans AS (
SELECT
TSTZRANGE(
sub_times.breakpoint,
LEAD(sub_times.breakpoint, 1, 'infinity'::TIMESTAMPTZ) OVER (ORDER BY sub_times.breakpoint)
) AS timespan
FROM
(
SELECT DISTINCT
cte_times.breakpoint
FROM
cte_times
) AS sub_times
ORDER BY
timespan
)
SELECT
cte_timespans.timespan,
cte_foo.foo,
cte_bar.bar
FROM
cte_timespans
LEFT OUTER JOIN cte_foo ON cte_timespans.timespan && cte_foo.active_timespan
LEFT OUTER JOIN cte_bar ON cte_timespans.timespan && cte_bar.active_timespan
-- continue for each table to be joined...
-- LEFT OUTER JOIN cte_baz ON cte_timespans.timespan && cte_baz.active_timespan
ORDER BY
cte_timelines.timespan
性能显然很差,特别是当我加入几张桌子时(有些情况下超过十张)。这是解决这种情况的最佳方法吗?
一厢情愿:
如果PostgreSQL有某种RANGE JOIN
,那就太好了 - 这就像是:
SELECT
*
FROM
foo
FULL OUTER RANGE JOIN bar ON foo.active_timespan && bar.active_timespan
答案 0 :(得分:0)
WITH
eventlist AS (
SELECT DISTINCT
UNNEST(
ARRAY [
LOWER( active_timespan ),
UPPER( active_timespan )
]
) AS timestamp
FROM foo WHERE active_timespan && $1
UNION DISTINCT
SELECT
UNNEST(
ARRAY [
LOWER( active_timespan ),
UPPER( active_timespan )
]
) AS timestamp
FROM bar WHERE bar.active_timespan && $2
),
durations AS (
SELECT
TSTZRANGE(
timestamp,
LEAD( timestamp, 1, 'infinity' ) OVER (ORDER BY timestamp ASC)
) AS active_span
FROM eventlist
)
SELECT
durations.active_span,
foo.foo,
bar.bar
FROM durations
LEFT JOIN foo ON active_span && foo.active_timespan
LEFT JOIN bar ON active_span && bar.active_timespan
WHERE (foo.foo IS NOT NULL OR bar.bar IS NOT NULL)
AND foo.active_timespan && $1
AND bar.active_timespan && $2
;