为什么有时添加查询条件会在Postgresql中加速它?

时间:2016-07-28 13:19:11

标签: sql postgresql

我有两个表,由Django ORM生成 - core_instausercore_instauser_followers。请参阅下面的CREATE TABLE语句

查询获取关注者几个帐户并按某些列(counts_followed_by)排序超过30秒:

# SELECT
#   T3."id"
# FROM "core_instauser_followers"
#   INNER JOIN "core_instauser" T3 ON ("core_instauser_followers"."to_instauser_id" = T3."id")
# WHERE "core_instauser_followers"."from_instauser_id" IN (14275, 30533081)
# ORDER BY T3."counts_followed_by" DESC
# LIMIT 10;
   id   
--------
  23358
  17461
  34360
  34201
  30624
  12475
 306799
  19215
  21042
  27073
(10 rows)

Time: 32850.160 ms

但是如果添加条件而不是更改结果,查询只需0.3秒 - 在100中更快:

# SELECT
#   T3."id"
# FROM "core_instauser_followers"
#   INNER JOIN "core_instauser" T3 ON ("core_instauser_followers"."to_instauser_id" = T3."id")
# WHERE ("core_instauser_followers"."from_instauser_id" IN (14275, 30533081) AND T3."count_media" > 0 AND
#        T3."counts_follows" > -1 AND T3."counts_followed_by" > -1)
# ORDER BY T3."counts_followed_by" DESC
# LIMIT 10;
   id   
--------
  23358
  17461
  34360
  34201
  30624
  12475
 306799
  19215
  21042
  27073
(10 rows)

Time: 295.934 ms

表中的所有列都有索引。

为什么会这样?

UPD

用于创建表的SQL:

-- core_instauser

CREATE TABLE core_instauser (
    id integer NOT NULL,
    uid character varying(100) NOT NULL,
    username character varying(100) NOT NULL,
    full_name character varying(100) NOT NULL,
    profile_picture character varying(255) NOT NULL,
    counts_followed_by integer,
    counts_follows integer,
    count_media integer,
    owner_id integer,
    hidden boolean NOT NULL,
    loaded boolean NOT NULL,
    update_time timestamp with time zone,
    has_avatar boolean,
    follow_rate double precision,
    deleted boolean NOT NULL,
    bio text NOT NULL,
    count_loaded_followers integer NOT NULL,
    has_bio boolean,
    has_full_name boolean,
    has_website boolean,
    website text NOT NULL
);

CREATE SEQUENCE core_instauser_id_seq
    START WITH 1
    INCREMENT BY 1
    NO MINVALUE
    NO MAXVALUE
    CACHE 1;

ALTER TABLE ONLY core_instauser ALTER COLUMN id SET DEFAULT nextval('core_instauser_id_seq'::regclass);

ALTER TABLE ONLY core_instauser
    ADD CONSTRAINT core_instauser_pkey PRIMARY KEY (id);

ALTER TABLE ONLY core_instauser
    ADD CONSTRAINT core_instauser_uid_key UNIQUE (uid);

CREATE INDEX core_instauser_count_media_480f209b0ba2dbd4_uniq ON core_instauser USING btree (count_media);
CREATE INDEX core_instauser_counts_followed_by_33a853f6d98098dc_uniq ON core_instauser USING btree (counts_followed_by);
CREATE INDEX core_instauser_counts_follows_66136283704427b2_uniq ON core_instauser USING btree (counts_follows);

-- core_instauser_followers

CREATE TABLE core_instauser_followers (
    id integer NOT NULL,
    from_instauser_id integer NOT NULL,
    to_instauser_id integer NOT NULL
);

CREATE SEQUENCE core_instauser_followers_id_seq
    START WITH 1
    INCREMENT BY 1
    NO MINVALUE
    NO MAXVALUE
    CACHE 1;

ALTER TABLE ONLY core_instauser_followers ALTER COLUMN id SET DEFAULT nextval('core_instauser_followers_id_seq'::regclass);

ALTER TABLE ONLY core_instauser_followers
    ADD CONSTRAINT core_instauser_followers_from_instauser_id_to_instauser_id_key UNIQUE (from_instauser_id, to_instauser_id);

ALTER TABLE ONLY core_instauser_followers
    ADD CONSTRAINT core_instauser_followers_pkey PRIMARY KEY (id);

CREATE INDEX core_instauser_followers_f865d5f5 ON core_instauser_followers USING btree (from_instauser_id);
CREATE INDEX core_instauser_followers_f9b32b2c ON core_instauser_followers USING btree (to_instauser_id);

ALTER TABLE ONLY core_instauser_followers
    ADD CONSTRAINT core_in_from_instauser_id_2ac1cc9fc9c44a79_fk_core_instauser_id FOREIGN KEY (from_instauser_id) REFERENCES core_instauser(id) DEFERRABLE INITIALLY DEFERRED;

ALTER TABLE ONLY core_instauser_followers
    ADD CONSTRAINT core_inst_to_instauser_id_4236828dfe87cfb8_fk_core_instauser_id FOREIGN KEY (to_instauser_id) REFERENCES core_instauser(id) DEFERRABLE INITIALLY DEFERRED;

解释查询:

没有附加条件的第一个慢查询

# EXPLAIN ANALYZE SELECT
#   T3."id"
# FROM "core_instauser_followers"
#   INNER JOIN "core_instauser" T3 ON ("core_instauser_followers"."to_instauser_id" = T3."id")
# WHERE "core_instauser_followers"."from_instauser_id" IN (14275, 30533081)
# ORDER BY T3."counts_followed_by" DESC
# LIMIT 10;

 Limit  (cost=1.13..32396.65 rows=10 width=8) (actual time=37561.457..37683.384 rows=10 loops=1)
   ->  Nested Loop  (cost=1.13..48956112.71 rows=15112 width=8) (actual time=37561.455..37683.369 rows=10 loops=1)
         ->  Index Scan Backward using core_instauser_counts_followed_by_33a853f6d98098dc_uniq on core_instauser t3  (cost=0.56..4942183.77 rows=31451512 width=8) (actual time=0.066..4153.129 rows=4492685 loops=1)
         ->  Index Only Scan using core_instauser_followers_from_instauser_id_to_instauser_id_key on core_instauser_followers  (cost=0.57..1.39 rows=1 width=4) (actual time=0.006..0.006 rows=0 loops=4492685)
               Index Cond: ((from_instauser_id = ANY ('{14275,30533081}'::integer[])) AND (to_instauser_id = t3.id))
               Heap Fetches: 10
 Total runtime: 37683.475 ms
(7 rows)

使用附加条件进行快速查询

# EXPLAIN ANALYZE SELECT
#   T3."id"
# FROM "core_instauser_followers"
#   INNER JOIN "core_instauser" T3 ON ("core_instauser_followers"."to_instauser_id" = T3."id")
# WHERE ("core_instauser_followers"."from_instauser_id" IN (14275, 30533081) AND T3."count_media" > -1 AND
#        T3."counts_follows" > -1 AND T3."counts_followed_by" > -1)
# ORDER BY T3."counts_followed_by" DESC
# LIMIT 10;

 Limit  (cost=1.13..36969.96 rows=10 width=8) (actual time=24.635..222.119 rows=10 loops=1)
   ->  Nested Loop  (cost=1.13..35453106.76 rows=9590 width=8) (actual time=24.633..222.100 rows=10 loops=1)
         ->  Index Scan Backward using core_instauser_counts_followed_by_33a853f6d98098dc_uniq on core_instauser t3  (cost=0.56..5029740.19 rows=19958436 width=8) (actual time=0.037..60.866 rows=13387 loops=1)
               Index Cond: (counts_followed_by > (-1))
               Filter: ((count_media > (-1)) AND (counts_follows > (-1)))
         ->  Index Only Scan using core_instauser_followers_from_instauser_id_to_instauser_id_key on core_instauser_followers  (cost=0.57..1.51 rows=1 width=4) (actual time=0.009..0.009 rows=0 loops=13387)
               Index Cond: ((from_instauser_id = ANY ('{14275,30533081}'::integer[])) AND (to_instauser_id = t3.id))
               Heap Fetches: 10
 Total runtime: 222.208 ms
(9 rows)

UPD 2

第二个查询中的所有已过滤列(count_media,counts_follows,counts_followed_by)的值大于或等于0,因此新条件不得影响最终结果

# SELECT count(*)
  FROM core_instauser
  WHERE counts_followed_by < 0 OR count_media < 0 OR counts_follows < 0;
 count 
-------
     0
(1 row)

Time: 5.551 ms

1 个答案:

答案 0 :(得分:1)

当您添加谓词时,您告诉优化器您只对特定行感兴趣(core_instauser)。

旧查询必须匹配core_instauser中的每一行。对于每一行= 4492685发现它必须检查另一个表格的匹配。

新查询将core_instauser表限制为仅获取rows = 13387,并且必须仅搜索其他表13387次。

在谓词中添加时,您将获得相同数量的行或小于实际表的行数。

现在问你为什么不同的查询获得相同的结果。 在您的情况下,恰好只有core_instauser表中的那些行与core_instauser_followers匹配。答案可能不同。

我们可以将它与两袋带有数字的球进行比较。 一个包有1,2和3。 另一个包有1和2.

现在你加入行李来获得一场比赛。

Select * from bag1 join bag2 on (number) will join two rows: 1 and 2.

现在你限制bag1以获得所有小于3的球:

select * from bag1 join bag2 where bag1.number < 3.
The result will not change.