基表
CREATE TABLE IF NOT EXISTS test_sessions
(
session_id UInt64,
session_name String,
created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY (session_id);
有以下数据
INSERT INTO test_sessions (session_id, session_name, created_at) VALUES
(1, 'start', '2021-01-31 00:00:00'),
(1, 'stop', '2021-01-31 01:00:00'),
(2, 'start', '2021-01-31 01:00:00')
;
创建了 2 个物化视图来关闭会话
CREATE MATERIALIZED VIEW IF NOT EXISTS test_session_aggregate_states
(
session_id UInt64,
started_at AggregateFunction(minIf, DateTime, UInt8),
stopped_at AggregateFunction(maxIf, DateTime, UInt8)
)
ENGINE = AggregatingMergeTree
PARTITION BY tuple()
ORDER BY (session_id)
POPULATE AS
SELECT session_id,
minIfState(created_at, session_name = 'start') AS started_at,
maxIfState(created_at, session_name = 'stop') AS stopped_at
FROM test_sessions
GROUP BY session_id;
CREATE VIEW IF NOT EXISTS test_session_completed
(
session_id UInt64,
started_at DateTime,
stopped_at DateTime
)
AS
SELECT session_id,
minIfMerge(started_at) AS started_at,
maxIfMerge(stopped_at) AS stopped_at
FROM test_session_aggregate_states
GROUP BY session_id
HAVING (started_at != '0000-00-00 00:00:00') AND
(stopped_at != '0000-00-00 00:00:00')
;
正常工作:返回 1 行已存在的“开始”和“停止”
SELECT * FROM test_session_completed;
-- 1,2021-01-31 00:00:00,2021-01-31 01:00:00
尝试创建基于 test_session_completed
的物化视图并连接到其他表(示例中没有连接)
CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv
(
session_id UInt64
)
ENGINE = MergeTree
PARTITION BY tuple()
ORDER BY (session_id)
POPULATE AS
SELECT session_id
FROM test_session_completed
;
编写测试查询来测试 test_mv
INSERT INTO test_sessions (session_id, session_name, created_at) VALUES
(3, 'start', '2021-01-31 02:00:00'),
(3, 'stop', '2021-01-31 03:00:00');
SELECT * FROM test_session_completed;
-- SUCCESS
-- 3,2021-01-31 02:00:00,2021-01-31 03:00:00
-- 1,2021-01-31 00:00:00,2021-01-31 01:00:00
SELECT * FROM test_mv;
-- FAILURE
-- 1
-- EXPECTED RESULT
-- 3
-- 1
如何根据test_mv
填写test_session_completed
?
ClickHouse 版本:20.11.4.13
答案 0 :(得分:1)
completed
的情况下不可能获得状态 started
。如果您不需要检查 started
发生在 completed
之前,那么您可以制作更简单的 MV,只需检查 where completed
。minIfState
,您可以使用min
(SimpleAggregateFunction)。它将减少存储的数据并提高性能。检查这个: https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf
https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ&t=9371
我会这样做:
CREATE TABLE IF NOT EXISTS test_sessions ( session_id UInt64, session_name String, created_at DateTime ) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at) ORDER BY (session_id); CREATE MATERIALIZED VIEW IF NOT EXISTS test_session_aggregate_states ( session_id UInt64, started_at SimpleAggregateFunction(min, DateTime), stopped_at SimpleAggregateFunction(max, DateTime) ) ENGINE = AggregatingMergeTree PARTITION BY tuple() ORDER BY (session_id) POPULATE AS SELECT session_id, minIf(created_at, session_name = 'start') AS started_at, maxIf(created_at, session_name = 'stop') AS stopped_at FROM test_sessions GROUP BY session_id; INSERT INTO test_sessions (session_id, session_name, created_at) VALUES (3, 'start', '2021-01-31 02:00:00'), (3, 'stop', '2021-01-31 03:00:00'); completed sessions: SELECT session_id, min(started_at) AS started_at, max(stopped_at) AS stopped_at FROM test_session_aggregate_states GROUP BY session_id HAVING (started_at != '0000-00-00 00:00:00') AND (stopped_at != '0000-00-00 00:00:00'); ┌─session_id─┬──────────started_at─┬──────────stopped_at─┐ │ 1 │ 2021-01-31 00:00:00 │ 2021-01-31 01:00:00 │ └────────────┴─────────────────────┴─────────────────────┘
并且使用 argMaxState 您可以在一个 session_id 内聚合多个 start
stop