我有两个表,由Django ORM生成 - core_instauser
和core_instauser_followers
。请参阅下面的CREATE TABLE语句
查询获取关注者几个帐户并按某些列(counts_followed_by)排序超过30秒:
# SELECT
# T3."id"
# FROM "core_instauser_followers"
# INNER JOIN "core_instauser" T3 ON ("core_instauser_followers"."to_instauser_id" = T3."id")
# WHERE "core_instauser_followers"."from_instauser_id" IN (14275, 30533081)
# ORDER BY T3."counts_followed_by" DESC
# LIMIT 10;
id
--------
23358
17461
34360
34201
30624
12475
306799
19215
21042
27073
(10 rows)
Time: 32850.160 ms
但是如果添加条件而不是更改结果,查询只需0.3秒 - 在100中更快:
# SELECT
# T3."id"
# FROM "core_instauser_followers"
# INNER JOIN "core_instauser" T3 ON ("core_instauser_followers"."to_instauser_id" = T3."id")
# WHERE ("core_instauser_followers"."from_instauser_id" IN (14275, 30533081) AND T3."count_media" > 0 AND
# T3."counts_follows" > -1 AND T3."counts_followed_by" > -1)
# ORDER BY T3."counts_followed_by" DESC
# LIMIT 10;
id
--------
23358
17461
34360
34201
30624
12475
306799
19215
21042
27073
(10 rows)
Time: 295.934 ms
表中的所有列都有索引。
为什么会这样?
用于创建表的SQL:
-- core_instauser
CREATE TABLE core_instauser (
id integer NOT NULL,
uid character varying(100) NOT NULL,
username character varying(100) NOT NULL,
full_name character varying(100) NOT NULL,
profile_picture character varying(255) NOT NULL,
counts_followed_by integer,
counts_follows integer,
count_media integer,
owner_id integer,
hidden boolean NOT NULL,
loaded boolean NOT NULL,
update_time timestamp with time zone,
has_avatar boolean,
follow_rate double precision,
deleted boolean NOT NULL,
bio text NOT NULL,
count_loaded_followers integer NOT NULL,
has_bio boolean,
has_full_name boolean,
has_website boolean,
website text NOT NULL
);
CREATE SEQUENCE core_instauser_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER TABLE ONLY core_instauser ALTER COLUMN id SET DEFAULT nextval('core_instauser_id_seq'::regclass);
ALTER TABLE ONLY core_instauser
ADD CONSTRAINT core_instauser_pkey PRIMARY KEY (id);
ALTER TABLE ONLY core_instauser
ADD CONSTRAINT core_instauser_uid_key UNIQUE (uid);
CREATE INDEX core_instauser_count_media_480f209b0ba2dbd4_uniq ON core_instauser USING btree (count_media);
CREATE INDEX core_instauser_counts_followed_by_33a853f6d98098dc_uniq ON core_instauser USING btree (counts_followed_by);
CREATE INDEX core_instauser_counts_follows_66136283704427b2_uniq ON core_instauser USING btree (counts_follows);
-- core_instauser_followers
CREATE TABLE core_instauser_followers (
id integer NOT NULL,
from_instauser_id integer NOT NULL,
to_instauser_id integer NOT NULL
);
CREATE SEQUENCE core_instauser_followers_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER TABLE ONLY core_instauser_followers ALTER COLUMN id SET DEFAULT nextval('core_instauser_followers_id_seq'::regclass);
ALTER TABLE ONLY core_instauser_followers
ADD CONSTRAINT core_instauser_followers_from_instauser_id_to_instauser_id_key UNIQUE (from_instauser_id, to_instauser_id);
ALTER TABLE ONLY core_instauser_followers
ADD CONSTRAINT core_instauser_followers_pkey PRIMARY KEY (id);
CREATE INDEX core_instauser_followers_f865d5f5 ON core_instauser_followers USING btree (from_instauser_id);
CREATE INDEX core_instauser_followers_f9b32b2c ON core_instauser_followers USING btree (to_instauser_id);
ALTER TABLE ONLY core_instauser_followers
ADD CONSTRAINT core_in_from_instauser_id_2ac1cc9fc9c44a79_fk_core_instauser_id FOREIGN KEY (from_instauser_id) REFERENCES core_instauser(id) DEFERRABLE INITIALLY DEFERRED;
ALTER TABLE ONLY core_instauser_followers
ADD CONSTRAINT core_inst_to_instauser_id_4236828dfe87cfb8_fk_core_instauser_id FOREIGN KEY (to_instauser_id) REFERENCES core_instauser(id) DEFERRABLE INITIALLY DEFERRED;
解释查询:
没有附加条件的第一个慢查询
# EXPLAIN ANALYZE SELECT
# T3."id"
# FROM "core_instauser_followers"
# INNER JOIN "core_instauser" T3 ON ("core_instauser_followers"."to_instauser_id" = T3."id")
# WHERE "core_instauser_followers"."from_instauser_id" IN (14275, 30533081)
# ORDER BY T3."counts_followed_by" DESC
# LIMIT 10;
Limit (cost=1.13..32396.65 rows=10 width=8) (actual time=37561.457..37683.384 rows=10 loops=1)
-> Nested Loop (cost=1.13..48956112.71 rows=15112 width=8) (actual time=37561.455..37683.369 rows=10 loops=1)
-> Index Scan Backward using core_instauser_counts_followed_by_33a853f6d98098dc_uniq on core_instauser t3 (cost=0.56..4942183.77 rows=31451512 width=8) (actual time=0.066..4153.129 rows=4492685 loops=1)
-> Index Only Scan using core_instauser_followers_from_instauser_id_to_instauser_id_key on core_instauser_followers (cost=0.57..1.39 rows=1 width=4) (actual time=0.006..0.006 rows=0 loops=4492685)
Index Cond: ((from_instauser_id = ANY ('{14275,30533081}'::integer[])) AND (to_instauser_id = t3.id))
Heap Fetches: 10
Total runtime: 37683.475 ms
(7 rows)
使用附加条件进行快速查询
# EXPLAIN ANALYZE SELECT
# T3."id"
# FROM "core_instauser_followers"
# INNER JOIN "core_instauser" T3 ON ("core_instauser_followers"."to_instauser_id" = T3."id")
# WHERE ("core_instauser_followers"."from_instauser_id" IN (14275, 30533081) AND T3."count_media" > -1 AND
# T3."counts_follows" > -1 AND T3."counts_followed_by" > -1)
# ORDER BY T3."counts_followed_by" DESC
# LIMIT 10;
Limit (cost=1.13..36969.96 rows=10 width=8) (actual time=24.635..222.119 rows=10 loops=1)
-> Nested Loop (cost=1.13..35453106.76 rows=9590 width=8) (actual time=24.633..222.100 rows=10 loops=1)
-> Index Scan Backward using core_instauser_counts_followed_by_33a853f6d98098dc_uniq on core_instauser t3 (cost=0.56..5029740.19 rows=19958436 width=8) (actual time=0.037..60.866 rows=13387 loops=1)
Index Cond: (counts_followed_by > (-1))
Filter: ((count_media > (-1)) AND (counts_follows > (-1)))
-> Index Only Scan using core_instauser_followers_from_instauser_id_to_instauser_id_key on core_instauser_followers (cost=0.57..1.51 rows=1 width=4) (actual time=0.009..0.009 rows=0 loops=13387)
Index Cond: ((from_instauser_id = ANY ('{14275,30533081}'::integer[])) AND (to_instauser_id = t3.id))
Heap Fetches: 10
Total runtime: 222.208 ms
(9 rows)
第二个查询中的所有已过滤列(count_media,counts_follows,counts_followed_by)的值大于或等于0,因此新条件不得影响最终结果
# SELECT count(*)
FROM core_instauser
WHERE counts_followed_by < 0 OR count_media < 0 OR counts_follows < 0;
count
-------
0
(1 row)
Time: 5.551 ms
答案 0 :(得分:1)
当您添加谓词时,您告诉优化器您只对特定行感兴趣(core_instauser)。
旧查询必须匹配core_instauser中的每一行。对于每一行= 4492685发现它必须检查另一个表格的匹配。
新查询将core_instauser表限制为仅获取rows = 13387,并且必须仅搜索其他表13387次。
在谓词中添加时,您将获得相同数量的行或小于实际表的行数。
现在问你为什么不同的查询获得相同的结果。 在您的情况下,恰好只有core_instauser表中的那些行与core_instauser_followers匹配。答案可能不同。
我们可以将它与两袋带有数字的球进行比较。 一个包有1,2和3。 另一个包有1和2.
现在你加入行李来获得一场比赛。
Select * from bag1 join bag2 on (number) will join two rows: 1 and 2.
现在你限制bag1以获得所有小于3的球:
select * from bag1 join bag2 where bag1.number < 3.
The result will not change.