select version()
:PostgreSQL 9.6.2 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-16), 64-bit
create table if not exists cover.covering_s2_cell_ids (
covering_id int references cover.coverings(id) on delete cascade,
s2_cell_id bigint,
s2_cell_min bigint,
s2_cell_max bigint,
primary key (covering_id, s2_cell_id)
create table if not exists tiles.tileset_segment_counts (
tileset_id int references tiles.tilesets(id) on delete cascade,
s2_cell_id bigint not null,
segments jsonb not null,
num int not null,
primary key (tileset_id, s2_cell_id)
此外,我在tiles.tileset_segment_counts(tileset_id, s2_cell_id, num)
SELECT tsc.s2_cell_id, tsc.segments
from cover.covering_s2_cell_ids cs
JOIN tiles.tileset_segment_counts tsc on tsc.s2_cell_id BETWEEN cs.s2_cell_min AND cs.s2_cell_max
WHERE cs.covering_id = 2
and tsc.tileset_id = 1
and num > 100
Nested Loop (cost=0.71..1285012.81 rows=7919778 width=544) (actual time=0.039..20.139 rows=19305 loops=1)
Output: tsc.s2_cell_id, tsc.segments
Buffers: shared hit=5150
-> Index Scan using covering_s2_cell_ids_covering_id_idx on cover.covering_s2_cell_ids cs (cost=0.29..12.04 rows=157 width=16) (actual time=0.018..0.088 rows=157 loops=1)
Output: cs.covering_id, cs.s2_cell_id, cs.s2_cell_min, cs.s2_cell_max
Index Cond: (cs.covering_id = 2)
Buffers: shared hit=4
-> Index Scan using tileset_segment_counts_tileset_id_s2_cell_id_num_idx on tiles.tileset_segment_counts tsc (cost=0.43..7680.28 rows=50444 width=544) (actual time=0.006..0.075 rows=123 loops=157)
Output: tsc.tileset_id, tsc.s2_cell_id, tsc.segments, tsc.num
Index Cond: ((tsc.tileset_id = 1) AND (tsc.s2_cell_id >= cs.s2_cell_min) AND (tsc.s2_cell_id <= cs.s2_cell_max) AND (tsc.num > 100))
Buffers: shared hit=5146
Planning time: 0.156 ms
Execution time: 23.760 ms
Nested Loop (cost=0.29..405447.61 rows=1624688 width=1111) (actual time=4656.731..6367.624 rows=6185 loops=1)
Output: tsc.s2_cell_id, tsc.segments
Join Filter: ((tsc.s2_cell_id >= cs.s2_cell_min) AND (tsc.s2_cell_id <= cs.s2_cell_max))
Rows Removed by Join Filter: 14430122
Buffers: shared hit=142735
-> Seq Scan on tiles.tileset_segment_counts tsc (cost=0.00..149546.77 rows=93135 width=1111) (actual time=0.119..214.902 rows=91951 loops=1)
Output: tsc.tileset_id, tsc.s2_cell_id, tsc.segments, tsc.num
Filter: ((tsc.num > 1000) AND (tsc.tileset_id = 1))
Rows Removed by Filter: 362013
Buffers: shared hit=142731
-> Materialize (cost=0.29..12.82 rows=157 width=16) (actual time=0.000..0.030 rows=157 loops=91951)
Output: cs.s2_cell_min, cs.s2_cell_max
Buffers: shared hit=4
-> Index Scan using covering_s2_cell_ids_covering_id_idx on cover.covering_s2_cell_ids cs (cost=0.29..12.04 rows=157 width=16) (actual time=0.015..0.052 rows=157 loops=1)
Output: cs.s2_cell_min, cs.s2_cell_max
Index Cond: (cs.covering_id = 2)
Buffers: shared hit=4
Planning time: 0.152 ms
Execution time: 6368.822 ms
我的想法是,在这种情况下,规划器应该更有可能想要在tileset_segment_counts上使用索引扫描,因为它返回更少的行。我确保在创建索引后对表进行真空分析。任何想法将不胜感激。我不明白为什么有这个限制性更强的谓词会推动规划者在索引条件下使用Join Filter +顺序扫描。
- 编辑 -
的情况下,查询使用索引扫描或位图堆扫描并快速运行(下面为num> 1000):
Nested Loop (cost=1010.05..744541.04 rows=1624688 width=1111) (actual time=0.048..8.272 rows=6185 loops=1)
Output: tsc.s2_cell_id, tsc.segments
Buffers: shared hit=2353
-> Index Scan using covering_s2_cell_ids_covering_id_idx on cover.covering_s2_cell_ids cs (cost=0.29..12.04 rows=157 width=16) (actual time=0.018..0.076 rows=157 loops=1)
Output: cs.covering_id, cs.s2_cell_id, cs.s2_cell_min, cs.s2_cell_max
Index Cond: (cs.covering_id = 2)
Buffers: shared hit=4
-> Bitmap Heap Scan on tiles.tileset_segment_counts tsc (cost=1009.77..4638.74 rows=10348 width=1111) (actual time=0.013..0.035 rows=39 loops=157)
Output: tsc.tileset_id, tsc.s2_cell_id, tsc.segments, tsc.num
Recheck Cond: ((tsc.tileset_id = 1) AND (tsc.s2_cell_id >= cs.s2_cell_min) AND (tsc.s2_cell_id <= cs.s2_cell_max) AND (tsc.num > 1000))
Heap Blocks: exact=1688
Buffers: shared hit=2349
-> Bitmap Index Scan on tileset_segment_counts_tileset_id_s2_cell_id_num_idx (cost=0.00..1007.18 rows=10348 width=0) (actual time=0.011..0.011 rows=39 loops=157)
Index Cond: ((tsc.tileset_id = 1) AND (tsc.s2_cell_id >= cs.s2_cell_min) AND (tsc.s2_cell_id <= cs.s2_cell_max) AND (tsc.num > 1000))
Buffers: shared hit=661
Planning time: 0.156 ms
Execution time: 9.492 ms
设置为关闭并保留seq扫描,则情况也是如此。但是,仅设置enable_seqscan = OFF
-> Bitmap Heap Scan on tiles.tileset_segment_counts tsc (cost=43731.56..178253.16 rows=93135 width=1111) (actual time=41.738..105.038 rows=91951 loops=1)
Output: tsc.tileset_id, tsc.s2_cell_id, tsc.segments, tsc.num
Recheck Cond: ((tsc.tileset_id = 1) AND (tsc.num > 1000))
Heap Blocks: exact=28833
Buffers: shared hit=38069
-> Bitmap Index Scan on tileset_segment_counts_tileset_id_s2_cell_id_num_idx (cost=0.00..43708.27 rows=93135 width=0) (actual time=36.765..36.765 rows=91951 loops=1)
Index Cond: ((tsc.tileset_id = 1) AND (tsc.num > 1000))
Buffers: shared hit=9236
- 编辑#2 -
更新了第一次编辑中的第一个查询计划,以使num > 1000
SELECT tsc.s2_cell_id, tsc.segments
from d
join cover.covering_s2_cell_ids cs on d.default_covering_id = cs.covering_id
JOIN tiles.tileset_segment_counts tsc on tsc.s2_cell_id BETWEEN cs.s2_cell_min AND cs.s2_cell_max
and tsc.tileset_id = 1
and num > 1000
Nested Loop (cost=0.71..99387.70 rows=1021667 width=1108) (actual time=0.033..7.860 rows=6185 loops=1)
Output: tsc.s2_cell_id, tsc.segments
Buffers: shared hit=2524
-> Nested Loop (cost=0.29..147.85 rows=100 width=16) (actual time=0.017..0.162 rows=157 loops=1)
Output: cs.s2_cell_min, cs.s2_cell_max
Buffers: shared hit=8
-> Seq Scan on d (cost=0.00..5.56 rows=1 width=4) (actual time=0.008..0.029 rows=1 loops=1)
Output:,, d.neilsen_id, d.market_key, d.default_covering_id, d.enabled
Filter: ( = 2)
Rows Removed by Filter: 204
Buffers: shared hit=3
-> Index Scan using covering_s2_cell_ids_pkey on cover.covering_s2_cell_ids cs (cost=0.29..141.29 rows=100 width=20) (actual time=0.006..0.072 rows=157 loops=1)
Output: cs.covering_id, cs.s2_cell_id, cs.s2_cell_min, cs.s2_cell_max
Index Cond: (cs.covering_id = d.default_covering_id)
Buffers: shared hit=5
-> Index Scan using tileset_segment_counts_tileset_id_s2_cell_id_num_idx on tiles.tileset_segment_counts tsc (cost=0.42..890.23 rows=10217 width=1108) (actual time=0.004..0.031 rows=39 loops=157)
Output: tsc.tileset_id, tsc.s2_cell_id, tsc.segments, tsc.num
Index Cond: ((tsc.tileset_id = 1) AND (tsc.s2_cell_id >= cs.s2_cell_min) AND (tsc.s2_cell_id <= cs.s2_cell_max) AND (tsc.num > 1000))
Buffers: shared hit=2516
Planning time: 0.416 ms
Execution time: 9.010 ms
- 编辑#3 -
在cover.covering_s2_cell_ids(covering_id, s2_cell_min, s2_cell_max)
Nested Loop (cost=0.29..267283.27 rows=1602272 width=1107) (actual time=68.918..411.856 rows=6185 loops=1)
Output: tsc.s2_cell_id, tsc.segments
Buffers: shared hit=340825 read=3810
-> Seq Scan on tiles.tileset_segment_counts tsc (cost=0.00..149528.89 rows=91850 width=1107) (actual time=0.010..213.931 rows=91951 loops=1)
Output: tsc.tileset_id, tsc.s2_cell_id, tsc.segments, tsc.num
Filter: ((tsc.num > 1000) AND (tsc.tileset_id = 1))
Rows Removed by Filter: 362013
Buffers: shared hit=138924 read=3807
-> Index Only Scan using covering_s2_cell_ids_covering_id_s2_cell_min_s2_cell_max_idx on cover.covering_s2_cell_ids cs (cost=0.29..1.11 rows=17 width=16) (actual time=0.002..0.002 rows=0 loops=91951)
Output: cs.covering_id, cs.s2_cell_min, cs.s2_cell_max
Index Cond: ((cs.covering_id = 2) AND (cs.s2_cell_min <= tsc.s2_cell_id) AND (cs.s2_cell_max >= tsc.s2_cell_id))
Heap Fetches: 0
Buffers: shared hit=201901 read=3
Planning time: 0.372 ms
Execution time: 413.054 ms
答案 0 :(得分:0)
实现@ joop关于为minmax使用(int8)范围的想法(可以通过索引搜索)
create table covering_s2_cell_ids (
covering_id int -- references coverings(id) on delete cascade
, s2_cell_id bigint
-- , s2_cell_min bigint
-- , s2_cell_max bigint
, cellminmax int8range NOT NULL -- << HERE
, primary key (covering_id, s2_cell_id)
CREATE INDEX ON covering_s2_cell_ids USING GIST(cellminmax); -- << HERE
create table tileset_segment_counts (
tileset_id int -- references tilesets(id) on delete cascade
, s2_cell_id bigint not null
, segments jsonb not null
, num int not null
, primary key (tileset_id, s2_cell_id)
CREATE index on tileset_segment_counts(tileset_id, s2_cell_id, num);
VACUUM ANALYZE covering_s2_cell_ids;
VACUUM ANALYZE tileset_segment_counts;
SELECT tsc.s2_cell_id, tsc.segments
from covering_s2_cell_ids cs
JOIN tileset_segment_counts tsc
-- on tsc.s2_cell_id BETWEEN cs.s2_cell_min AND cs.s2_cell_max
on tsc.s2_cell_id <@ cs.cellminmax -- << HERE
WHERE cs.covering_id = 2
and tsc.tileset_id = 1
and num > 100
Nested Loop (cost=3.24..9.61 rows=1 width=40)
-> Bitmap Heap Scan on tileset_segment_counts tsc (cost=1.22..3.34 rows=2 width=40)
Recheck Cond: ((tileset_id = 1) AND (num > 100))
-> Bitmap Index Scan on tileset_segment_counts_tileset_id_s2_cell_id_num_idx (cost=0.00..1.22 rows=2 width=0)
Index Cond: ((tileset_id = 1) AND (num > 100))
-> Bitmap Heap Scan on covering_s2_cell_ids cs (cost=2.02..3.13 rows=1 width=32)
Recheck Cond: ((tsc.s2_cell_id <@ cellminmax) AND (covering_id = 2))
-> BitmapAnd (cost=2.02..2.02 rows=1 width=0)
-> Bitmap Index Scan on covering_s2_cell_ids_cellminmax_idx (cost=0.00..0.66 rows=6 width=0)
Index Cond: (tsc.s2_cell_id <@ cellminmax)
-> Bitmap Index Scan on covering_s2_cell_ids_pkey (cost=0.00..1.21 rows=6 width=0)
Index Cond: (covering_id = 2)
(12 rows)