鉴于下表:
CREATE TABLE channel1m (
ts TIMESTAMP WITHOUT TIME ZONE NOT NULL,
itemId BIGINT,
value BIGINT
)
其中 每个分钟,每个itemId,如下所示:
ts itemId value
2012-12-03 15:29:00 100 1
2012-12-03 15:30:00 100 2
2012-12-03 15:30:00 101 0
2012-12-03 15:32:00 100 1
2012-12-03 15:32:00 101 1
我找不到一种方法(不创建额外的表)来编写填充时间间隔的查询(例如,对于itemId 101为15:29:00,对于两个项目为15:31:00)价值NULL
。
预期的结果集为:
ts itemId value
2012-12-03 15:29:00 100 1
2012-12-03 15:29:00 101 NULL
2012-12-03 15:30:00 100 2
2012-12-03 15:30:00 101 0
2012-12-03 15:31:00 100 NULL
2012-12-03 15:31:00 101 NULL
2012-12-03 15:32:00 100 1
2012-12-03 15:32:00 101 1
我发现解决方案有一个单独的时间表和完整的时间戳系列,但我更愿意单独在查询中解决这个问题。这可能吗?
答案 0 :(得分:7)
DROP SCHEMA tmp CASCADE;
CREATE SCHEMA tmp ;
SET search_path = tmp;
DROP TABLE IF EXISTS channel1m CASCADE;
CREATE TABLE channel1m (
zts TIMESTAMP WITHOUT TIME ZONE NOT NULL,
zitemid BIGINT,
zvalue BIGINT
);
-- in which a row may be inserted each minute, per zitemid, as follows:
INSERT INTO channel1m(zts, zitemid, zvalue) VALUES
('2012-12-03 15:29:00', 100, 1)
,('2012-12-03 15:30:00', 100, 2)
,('2012-12-03 15:30:00', 101, 0)
,('2012-12-03 15:32:00', 100, 1)
,('2012-12-03 15:32:00', 101, 1)
;
-- CTE to the rescue!!!
WITH cal AS (
WITH mm AS (
SELECT MIN(xx.zts) AS minmin, MAX(xx.zts) AS maxmax
FROM channel1m xx)
SELECT generate_series(mm.minmin , mm.maxmax , '1 min'::interval) AS stamp
FROM mm
)
, ite AS (
SELECT DISTINCT zitemid AS zitemid
FROM channel1m
)
SELECT cal.stamp
, ite.zitemid
, tab.zvalue
FROM cal
JOIN ite ON 1=1 -- Note: this is a cartesian product of the {time,id} -domains
LEFT JOIN channel1m tab ON tab.zts = cal.stamp AND tab.zitemid = ite.zitemid
ORDER BY stamp ASC
;
输出:
NOTICE: drop cascades to table tmp.channel1m
DROP SCHEMA
CREATE SCHEMA
SET
NOTICE: table "channel1m" does not exist, skipping
DROP TABLE
CREATE TABLE
INSERT 0 5
stamp | zitemid | zvalue
---------------------+---------+--------
2012-12-03 15:29:00 | 101 |
2012-12-03 15:29:00 | 100 | 1
2012-12-03 15:30:00 | 100 | 2
2012-12-03 15:30:00 | 101 | 0
2012-12-03 15:31:00 | 100 |
2012-12-03 15:31:00 | 101 |
2012-12-03 15:32:00 | 100 | 1
2012-12-03 15:32:00 | 101 | 1
(8 rows)
答案 1 :(得分:5)
您将需要:包含所有itemId
的表和包含所有必需日期的(伪)表。
您可能拥有所有不同itemId
的表格。让我们称之为item_table
。
伪表,包含generate_series('start_date','end_date', interval '1 minute')
可以获得的日期。详情here。
查询:
SELECT gs.ts, it.itemId, ch1m.value
FROM item_table it
CROSS JOIN generate_series('start_date','end_date', interval '1 minute') gs(ts)
LEFT JOIN channel1m ch1m ON it.itemId = ch1m.itemId
AND gs.ts = ch1m.ts
将'start_date','end_date'
替换为所需的值,或从子查询中获取它们。
此查询:
1)通过CROSS JOIN
2)通过value
LEFT JOIN
答案 2 :(得分:1)
我认为最可读的方法是构建一系列表表达式。分钟和物品ID号码之间的交叉连接将为您提供每种组合。
with all_minutes as (
select ('2012-12-03 15:29'::timestamp +
(n || ' minute')::interval)::timestamp as ts
from generate_series(0,10) n
),
item_ids as (
select distinct itemid from channel1m
),
all_items_and_minutes as (
select all_minutes.ts, item_ids.itemid from all_minutes cross join item_ids
)
select all_items_and_minutes.ts, all_items_and_minutes.itemId, channel1m.value
from all_items_and_minutes
left join channel1m
on all_items_and_minutes.ts = channel1m.ts
and all_items_and_minutes.itemid = channel1m.itemid
order by all_items_and_minutes.ts, all_items_and_minutes.itemid
您可以使用SELECT语句替换时间戳文字,以获得所需的实际范围。如果您有一个包含所有唯一商品ID号的其他表,那么可能最好从那个表中选择,而不是从channel1m表中选择不同的值。
答案 3 :(得分:0)
time_bucket
或date_trunc
创建存储段边界。generate_series
为您的时间范围生成空存储桶。UNION
将空时间段数据集与您的数据合并。DISTINCT ON
在每个时间段中选择唯一的行,更喜欢带有数据的行。示例:
WITH
timeseries_data AS (
SELECT
time_bucket(interval '5 minutes', started_at) time_bucket_start,
count(distinct v1.value) unique_row_count
FROM
probe_execution pe1
CROSS JOIN LATERAL (
SELECT value
FROM jsonb_array_elements(pe1.result)
) v1
WHERE
pe1.probe_id = 8 AND
pe1.result_count > 0 AND
pe1.started_at > now() - interval '1 day' AND
pe1.ended_at < now()
GROUP BY time_bucket_start
UNION
SELECT
gs1 time_bucket_start,
0 unique_row_count
FROM
generate_series(
time_bucket(interval '5 minutes', now() - interval '1 day'),
time_bucket(interval '5 minutes', now()),
interval '1 minute'
) as gs1
)
SELECT DISTINCT ON (td1.time_bucket_start)
td1.time_bucket_start,
td1.unique_row_count
FROM timeseries_data td1
ORDER BY td1.time_bucket_start, td1.unique_row_count DESC