单独的PostgreSQL分区加入

时间:2019-11-29 09:00:43

标签: sql postgresql postgres-10

我正在使用PostgreSQL 10.6。我有几张桌子按天划分。每天都有自己的数据。我想在一天内从该表中选择行。

drop table IF EXISTS request;
drop table IF EXISTS request_identity;

CREATE TABLE IF NOT EXISTS request (
    id bigint not null,    
    record_date date not null,
    payload text not null
) PARTITION BY LIST (record_date);

CREATE TABLE IF NOT EXISTS request_p1 PARTITION OF request FOR VALUES IN ('2001-01-01');
CREATE TABLE IF NOT EXISTS request_p2 PARTITION OF request FOR VALUES IN ('2001-01-02');

CREATE INDEX IF NOT EXISTS i_request_p1_id ON request_p1 (id);
CREATE INDEX IF NOT EXISTS i_request_p2_id ON request_p2 (id);

do $$  
begin
  for i in 1..100000 loop
    INSERT INTO  request (id,record_date,payload) values (i, '2001-01-01', 'abc');
  end loop;  
  for i in 100001..200000 loop
    INSERT INTO  request (id,record_date,payload) values (i, '2001-01-02', 'abc');
  end loop;
end;
$$;

CREATE TABLE IF NOT EXISTS request_identity (
    record_date date not null,
    parent_id bigint NOT NULL,
    identity_name varchar(32),
    identity_value varchar(32)
) PARTITION BY LIST (record_date);   

CREATE TABLE IF NOT EXISTS request_identity_p1 PARTITION OF request_identity FOR VALUES IN ('2001-01-01');
CREATE TABLE IF NOT EXISTS request_identity_p2 PARTITION OF request_identity FOR VALUES IN ('2001-01-02');

CREATE INDEX IF NOT EXISTS i_request_identity_p1_payload ON request_identity_p1 (identity_name, identity_value);
CREATE INDEX IF NOT EXISTS i_request_identity_p2_payload ON request_identity_p2 (identity_name, identity_value);

do $$  
begin
  for i in 1..100000 loop
    INSERT INTO  request_identity (parent_id,record_date,identity_name,identity_value) values (i, '2001-01-01', 'NAME', 'somename'||i);
  end loop;  
  for i in 100001..200000 loop
    INSERT INTO  request_identity (parent_id,record_date,identity_name,identity_value) values (i, '2001-01-02', 'NAME', 'somename'||i);
  end loop;
end;
$$;

analyze request;
analyze request_identity;

我在1天内做出选择,并看到了一个不错的请求计划:

explain analyze select * 
    from request 
   where record_date between '2001-01-01' and '2001-01-01' 
   and exists (select * from request_identity where parent_id = id and identity_name = 'NAME' and identity_value = 'somename555' and record_date between '2001-01-01' and '2001-01-01')    
   limit 100;

Limit  (cost=8.74..16.78 rows=1 width=16)
  ->  Nested Loop  (cost=8.74..16.78 rows=1 width=16)
        ->  HashAggregate  (cost=8.45..8.46 rows=1 width=8)
              Group Key: request_identity_p1.parent_id
              ->  Append  (cost=0.42..8.44 rows=1 width=8)
                    ->  Index Scan using i_request_identity_p1_payload on request_identity_p1  (cost=0.42..8.44 rows=1 width=8)
                          Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename555'::text))
                          Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-01'::date))
        ->  Append  (cost=0.29..8.32 rows=1 width=16)
              ->  Index Scan using i_request_p1_id on request_p1  (cost=0.29..8.32 rows=1 width=16)
                    Index Cond: (id = request_identity_p1.parent_id)
                    Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-01'::date))

但是,如果我选择了2天或更长时间,那么PostgreSQL首先会附加request_identity所有分区和request所有分区的行,然后将它们加入。 所以这是无法按我想要的方式工作的SQL:

explain analyze select * 
    from request 
   where record_date between '2001-01-01' and '2001-01-02' 
   and exists (select * from request_identity where parent_id = id and identity_name = 'NAME' and identity_value = 'somename1777' and record_date between '2001-01-01' and '2001-01-02')    
   limit 100;  

Limit  (cost=17.19..50.21 rows=2 width=16)
  ->  Nested Loop  (cost=17.19..50.21 rows=2 width=16)
        ->  Unique  (cost=16.90..16.91 rows=2 width=8)
              ->  Sort  (cost=16.90..16.90 rows=2 width=8)
                    Sort Key: request_identity_p1.parent_id
                    ->  Append  (cost=0.42..16.89 rows=2 width=8)
                          ->  Index Scan using i_request_identity_p1_payload on request_identity_p1  (cost=0.42..8.44 rows=1 width=8)
                                Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename1777'::text))
                                Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
                          ->  Index Scan using i_request_identity_p2_payload on request_identity_p2  (cost=0.42..8.44 rows=1 width=8)
                                Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename1777'::text))
                                Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
        ->  Append  (cost=0.29..16.63 rows=2 width=16)
              ->  Index Scan using i_request_p1_id on request_p1  (cost=0.29..8.32 rows=1 width=16)
                    Index Cond: (id = request_identity_p1.parent_id)
                    Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
              ->  Index Scan using i_request_p2_id on request_p2  (cost=0.29..8.32 rows=1 width=16)
                    Index Cond: (id = request_identity_p1.parent_id)
                    Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))

在我的情况下,由于一致的行仅在1天的分区组内,因此没有必要(通过嵌套循环)联接这些追加。

对我来说,理想的结果是PostgreSQL首先在request_p1和request_identity_p1之间建立连接,而request_p2在request_identity_p2之间建立连接,然后才对结果进行追加。

问题是:

是否有一种方法可以在1天的分区组内分别在分区之间执行联接?

谢谢。

0 个答案:

没有答案