我将加载表从Postgres迁移到Clickhouse,以便进一步创建pg的FOREIGN TABLE和MATERIALIZED VIEW。但是,当尝试按created_at分组时,我得到了奇怪的结果,就像有些时候我根本没有负载:
# Clickhouse
CREATE TABLE loads(
country_id UInt16,
created_at Date
) ENGINE=MergeTree PARTITION BY toYYYYMM(created_at) ORDER BY (created_at);
# Pg
CREATE EXTENSION clickhousedb_fdw;
CREATE SERVER clickhouse_svr FOREIGN DATA WRAPPER clickhousedb_fdw OPTIONS(dbname 'db', driver '/usr/local/lib/odbc/libclickhouseodbc.so', host '127.0.0.1');
CREATE USER MAPPING FOR CURRENT_USER SERVER clickhouse_svr options(user 'default', password 'pwd');
CREATE FOREIGN TABLE dwh_loads(
country_id int,
created_at date
) SERVER clickhouse_svr;
因此,在Clickhouse上分组归还可以,但是pg外部表的结果却很奇怪,几天过去了:
# Clickhouse
SELECT
count(*),
created_at
FROM dwh_loads
WHERE created_at > '2019-08-10'
GROUP BY created_at
ORDER BY created_at ASC
┌─count()─┬──────created_at─┐
│ 1987751 │ 2019-08-11 │
│ 2225018 │ 2019-08-12 │
│ 4098923 │ 2019-08-13 │
│ 7453111 │ 2019-08-14 │
│ 6787449 │ 2019-08-15 │
│ 6396884 │ 2019-08-16 │
│ 5642157 │ 2019-08-17 │
│ 5485166 │ 2019-08-18 │
│ 4949855 │ 2019-08-19 │
│ 4968256 │ 2019-08-20 │
│ 4583210 │ 2019-08-21 │
│ 4781908 │ 2019-08-22 │
│ 4449216 │ 2019-08-23 │
│ 4911274 │ 2019-08-24 │
│ 205194 │ 2019-08-25 │
└─────────┴─────────────────┘
# Pg
select count(*), created_at
from dwh_loads where created_at > '2019-08-10'
group by created_at
order by created_at;
count | created_at_date
---------+-----------------
1752400 | 2019-08-11
3604480 | 2019-08-16
1637314 | 2019-08-18
823 | 2019-08-24
但是,如果我查询确切的日期-pg返回正确的结果:
where created_at_date = '2019-08-20'
返回4968256。