实例化视图的填充不正确

时间:2020-11-10 08:13:45

标签: clickhouse

“ test_sessions”表

CREATE TABLE IF NOT EXISTS test_sessions (
    id UInt64,
    name String,
    created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;

“ test_sessions”表数据

INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop',  now() + INTERVAL 1 day),
(2, 'start',  now() + INTERVAL 1 HOUR );

+----+-------+---------------------+
| id | name  | created_at          |
+----+-------+---------------------+
| 1  | start | 2020-11-10 07:58:19 |
+----+-------+---------------------+
| 2  | start | 2020-11-10 08:58:19 |
+----+-------+---------------------+
| 1  | stop  | 2020-11-11 07:58:19 |
+----+-------+---------------------+

“ finished_sessions”实例化视图

CREATE MATERIALIZED VIEW finished_sessions (
    id UInt64,
    start_at DateTime,
    end_at DateTime
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(start_at)
ORDER BY (id)
POPULATE AS
SELECT
    id,
    minIf(created_at, name = 'start') AS start_at,
    maxIf(created_at, name = 'stop')  AS end_at
FROM test_sessions
GROUP BY id
HAVING end_at <> '1970-01-01 00:00:00';

“ finished_sessions”实例化视图数据

SELECT * FROM finished_sessions;

+----+---------------------+---------------------+
| id | start_at            | end_at              |
+----+---------------------+---------------------+
| 1  | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+---------------------+---------------------+

直到这一刻,一切正常:只有1个关闭的会话

第二次会议结束后

INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())

发生不正确的填充

SELECT * from finished_sessions ORDER BY id;

+----+-------------------------------+---------------------+
| id | start_at                      | end_at              |
+----+-------------------------------+---------------------+
| 1  | 2020-11-10 07:58:19           | 2020-11-11 07:58:19 |
+----+-------------------------------+---------------------+
| 2  | ---> 1970-01-01 00:00:00 <--- | 2020-11-10 08:06:24 |
+----+-------------------------------+---------------------+

如何解决?

1 个答案:

答案 0 :(得分:1)

  1. 您应该使用AggregateFunction或更好的SimpleAggregateFunction

  2. 不可能通过AggregateFunction对表进行分区。因为AggregateFunction是在合并期间计算的,并且合并是在分区上执行的。

  3. MV是插入触发器。 https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf

    CREATE TABLE IF NOT EXISTS test_sessions (
        id UInt64,
        name String,
        created_at DateTime
    )
    ENGINE = MergeTree()
    PARTITION BY toYYYYMM(created_at)
    ORDER BY name;
    
    INSERT INTO test_sessions(id, name, created_at) VALUES
    (1, 'start', now()),
    (1, 'stop',  now() + INTERVAL 1 day),
    (2, 'start',  now() + INTERVAL 1 HOUR );
    
    CREATE MATERIALIZED VIEW finished_sessions
    ENGINE = AggregatingMergeTree
    ORDER BY (id)
    POPULATE AS
    SELECT
        id, 
        minStateIf(created_at, name = 'start') AS start_at,
        maxStateIf(created_at, name = 'stop')  AS end_at
    FROM test_sessions
    GROUP BY id
    
    INSERT INTO test_sessions(id, name, created_at) VALUES
    (2, 'stop', now());
    
    SELECT
        id,
        minMerge(start_at),
        maxMerge(end_at)
    FROM finished_sessions
    GROUP BY id
    
    Query id: d797eee4-6088-40b8-aa12-b10da62b60c5
    
    ┌─id─┬──minMerge(start_at)─┬────maxMerge(end_at)─┐
    │  2 │ 2020-11-10 15:18:19 │ 2020-11-10 14:21:54 │
    │  1 │ 2020-11-10 14:18:19 │ 2020-11-11 14:18:19 │
    └────┴─────────────────────┴─────────────────────┘
CREATE TABLE IF NOT EXISTS test_sessions (
    id UInt64,
    name String,
    created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;

INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop',  now() + INTERVAL 1 day),
(2, 'start',  now() + INTERVAL 1 HOUR );

CREATE MATERIALIZED VIEW finished_sessions
(
    id UInt64,
    start_at SimpleAggregateFunction(min,DateTime),
    end_at SimpleAggregateFunction(max,DateTime)
)
ENGINE = AggregatingMergeTree
ORDER BY (id)
POPULATE AS
SELECT
    id, 
    minIf(created_at, name = 'start') AS start_at,
    maxIf(created_at, name = 'stop')  AS end_at
FROM test_sessions
GROUP BY id;


INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())

optimize table finished_sessions final;


SELECT
        id,
        min(start_at),
        max(end_at)
    FROM finished_sessions
    GROUP BY id

┌─id─┬───────min(start_at)─┬─────────max(end_at)─┐
│  2 │ 1970-01-01 00:00:00 │ 2020-11-10 14:29:30 │
│  1 │ 2020-11-10 14:29:15 │ 2020-11-11 14:29:15 │
└────┴─────────────────────┴─────────────────────┘    
相关问题