PostgreSQL在LEFT JOIN +中的表现不佳并且不是DISTINCT FROM

时间:2016-11-27 16:07:56

标签: postgresql

我正在处理JOIN两个表,如下所示:

 SELECT count(*) FROM (
    SELECT * 
    FROM (SELECT * FROM lines_stats_process limit 800) lsp
    LEFT JOIN lines l ON 

    (lsp.id_data_source_connection = l.id_data_source_connection) AND 
    (lsp.id_sys_country IS NOT DISTINCT FROM l.id_sys_country) AND
    (lsp.advertiser_name IS NOT DISTINCT FROM l.advertiser_name) AND 
    (lsp.size_name IS NOT DISTINCT FROM l.size_name) AND 
    (lsp.dsp IS NOT DISTINCT FROM l.dsp) AND 

    (lsp.id_advertiser IS NOT DISTINCT FROM l.id_advertiser) AND 
    (lsp.id_placement IS NOT DISTINCT FROM l.id_placement) AND
    (lsp.id_site IS NOT DISTINCT FROM l.id_site) AND
    (lsp.id_campaign IS NOT DISTINCT FROM l.id_campaign) 
    ) test;

注意:我正在使用计数和限制800仅用于测试目的。没有限制,查询永远不会结束执行。

我的DDL:

    CREATE TABLE public.lines
    (
      id integer NOT NULL DEFAULT nextval('lines_seq'::regclass),
      id_account integer NOT NULL,
      id_data_source integer NOT NULL,
      id_data_source_connection integer NOT NULL,
      id_client integer,
      id_advertiser bigint,
      advertiser_name character varying(200) DEFAULT NULL::character varying,
      id_campaign bigint,
      campaign_name character varying(200) DEFAULT NULL::character varying,
      id_group bigint,
      group_name character varying(200) DEFAULT NULL::character varying,
      id_ad bigint,
      ad_name character varying(200) DEFAULT NULL::character varying,
      id_site bigint,
      site_name character varying(200) DEFAULT NULL::character varying,
      id_placement bigint,
      placement_name character varying(200) DEFAULT NULL::character varying,
      id_format bigint,
      format_name character varying(200) DEFAULT NULL::character varying,
      id_size bigint,
      size_name character varying(200) DEFAULT NULL::character varying,
      created_at timestamp(0) without time zone DEFAULT ('now'::text)::date,
      updated_at timestamp(0) without time zone DEFAULT ('now'::text)::date,
      id_ds_advertiser integer,
      id_ds_campaign integer,
      id_ds_group integer,
      id_ds_ad integer,
      id_ds_site integer,
      id_ds_placement integer,
      id_ds_format integer,
      id_publisher integer,
      id_country integer,
      dsp character varying(45) DEFAULT NULL::character varying,
      country_name character varying(200) DEFAULT NULL::character varying,
      id_ds_country integer,
      id_sys_country integer,
      CONSTRAINT lines_pkey PRIMARY KEY (id),
      CONSTRAINT lines_id_check CHECK (id > 0)
    )
    WITH (
      OIDS=FALSE
    );
    ALTER TABLE public.lines
      OWNER TO postgres;
    GRANT ALL ON TABLE public.lines TO postgres;

    -- Index: public.data_source_connection_lines

    -- DROP INDEX public.data_source_connection_lines;

    CREATE INDEX data_source_connection_lines
      ON public.lines
      USING btree
      (id_data_source_connection);

    -- Index: public.id_data_source_connection211

    -- DROP INDEX public.id_data_source_connection211;

    CREATE INDEX id_data_source_connection211
      ON public.lines
      USING btree
      (id_data_source_connection);

    -- Index: public.lines_advertiser_name

    -- DROP INDEX public.lines_advertiser_name;

    CREATE INDEX lines_advertiser_name
      ON public.lines
      USING btree
      (advertiser_name COLLATE pg_catalog."default");

    -- Index: public.lines_countries

    -- DROP INDEX public.lines_countries;

    CREATE INDEX lines_countries
      ON public.lines
      USING btree
      (id_country);

    -- Index: public.lines_dsp

    -- DROP INDEX public.lines_dsp;

    CREATE INDEX lines_dsp
      ON public.lines
      USING btree
      (dsp COLLATE pg_catalog."default");

    -- Index: public.lines_id_advertiser

    -- DROP INDEX public.lines_id_advertiser;

    CREATE INDEX lines_id_advertiser
      ON public.lines
      USING btree
      (id_advertiser);

    -- Index: public.lines_id_campaign

    -- DROP INDEX public.lines_id_campaign;

    CREATE INDEX lines_id_campaign
      ON public.lines
      USING btree
      (id_campaign);

    -- Index: public.lines_id_data_source_connection

    -- DROP INDEX public.lines_id_data_source_connection;

    CREATE INDEX lines_id_data_source_connection
      ON public.lines
      USING btree
      (id_data_source_connection);

    -- Index: public.lines_id_placement

    -- DROP INDEX public.lines_id_placement;

    CREATE INDEX lines_id_placement
      ON public.lines
      USING btree
      (id_placement);

    -- Index: public.lines_id_site

    -- DROP INDEX public.lines_id_site;

    CREATE INDEX lines_id_site
      ON public.lines
      USING btree
      (id_site);

    -- Index: public.lines_lsp

    -- DROP INDEX public.lines_lsp;

    CREATE INDEX lines_lsp
      ON public.lines
      USING btree
      (id_data_source_connection, advertiser_name COLLATE pg_catalog."default", size_name COLLATE pg_catalog."default", dsp COLLATE pg_catalog."default", id_advertiser, id_placement, id_site, id_campaign, id_sys_country);

    -- Index: public.lines_lsp2

    -- DROP INDEX public.lines_lsp2;

    CREATE INDEX lines_lsp2
      ON public.lines
      USING btree
      (id_data_source_connection, id_country, advertiser_name COLLATE pg_catalog."default", size_name COLLATE pg_catalog."default", dsp COLLATE pg_catalog."default", id_advertiser, id_placement, id_site, id_campaign);

    -- Index: public.lines_size_name

    -- DROP INDEX public.lines_size_name;

    CREATE INDEX lines_size_name
      ON public.lines
      USING btree
      (size_name COLLATE pg_catalog."default");











-- Table: public.lines_stats_process

-- DROP TABLE public.lines_stats_process;

CREATE TABLE public.lines_stats_process
(
  id integer NOT NULL DEFAULT nextval('lines_stats_process_seq'::regclass),
  id_line integer,
  id_line_stat integer,
  id_account integer NOT NULL,
  id_data_source integer NOT NULL,
  id_data_source_connection integer NOT NULL,
  id_client integer,
  id_advertiser bigint,
  advertiser_name character varying(200) DEFAULT NULL::character varying,
  id_campaign bigint,
  campaign_name character varying(200) DEFAULT NULL::character varying,
  id_group bigint,
  group_name character varying(200) DEFAULT NULL::character varying,
  id_ad bigint,
  ad_name character varying(200) DEFAULT NULL::character varying,
  id_site bigint,
  site_name character varying(200) DEFAULT NULL::character varying,
  id_placement bigint,
  placement_name character varying(200) DEFAULT NULL::character varying,
  id_format bigint,
  format_name character varying(200) DEFAULT NULL::character varying,
  id_size bigint,
  size_name character varying(200) DEFAULT NULL::character varying,
  created_at timestamp(0) without time zone DEFAULT ('now'::text)::date,
  updated_at timestamp(0) without time zone DEFAULT ('now'::text)::date,
  id_ds_advertiser integer,
  id_ds_campaign integer,
  id_ds_group integer,
  id_ds_ad integer,
  id_ds_site integer,
  id_ds_placement integer,
  id_ds_format integer,
  id_publisher integer,
  id_country integer,
  dsp character varying(45) DEFAULT NULL::character varying,
  date date NOT NULL,
  country_name character varying(50) DEFAULT NULL::character varying,
  id_sys_country integer,
  id_ds_country integer,
  budget numeric(10,2) DEFAULT NULL::numeric,
  bid numeric(10,2) DEFAULT NULL::numeric,
  CONSTRAINT lines_stats_process_pkey PRIMARY KEY (id),
  CONSTRAINT lines_stats_process_id_check CHECK (id > 0)
)
WITH (
  FILLFACTOR=70, 
  OIDS=FALSE
);
ALTER TABLE public.lines_stats_process
  OWNER TO postgres;
GRANT ALL ON TABLE public.lines_stats_process TO postgres;

-- Index: public.data_source_connection_lsp

-- DROP INDEX public.data_source_connection_lsp;

CREATE INDEX data_source_connection_lsp
  ON public.lines_stats_process
  USING btree
  (id_data_source_connection);

-- Index: public.id_data_source_connection21

-- DROP INDEX public.id_data_source_connection21;

CREATE INDEX id_data_source_connection21
  ON public.lines_stats_process
  USING btree
  (id_data_source_connection);

-- Index: public.lines_stats_process_advertiser_name

-- DROP INDEX public.lines_stats_process_advertiser_name;

CREATE INDEX lines_stats_process_advertiser_name
  ON public.lines_stats_process
  USING btree
  (advertiser_name COLLATE pg_catalog."default");

-- Index: public.lines_stats_process_countries

-- DROP INDEX public.lines_stats_process_countries;

CREATE INDEX lines_stats_process_countries
  ON public.lines_stats_process
  USING btree
  (id_sys_country);

-- Index: public.lines_stats_process_dsp

-- DROP INDEX public.lines_stats_process_dsp;

CREATE INDEX lines_stats_process_dsp
  ON public.lines_stats_process
  USING btree
  (dsp COLLATE pg_catalog."default");

-- Index: public.lines_stats_process_id_advertiser

-- DROP INDEX public.lines_stats_process_id_advertiser;

CREATE INDEX lines_stats_process_id_advertiser
  ON public.lines_stats_process
  USING btree
  (id_advertiser);

-- Index: public.lines_stats_process_id_campaign

-- DROP INDEX public.lines_stats_process_id_campaign;

CREATE INDEX lines_stats_process_id_campaign
  ON public.lines_stats_process
  USING btree
  (id_campaign);

-- Index: public.lines_stats_process_id_data_source_connection

-- DROP INDEX public.lines_stats_process_id_data_source_connection;

CREATE INDEX lines_stats_process_id_data_source_connection
  ON public.lines_stats_process
  USING btree
  (id_data_source_connection);

-- Index: public.lines_stats_process_id_placement

-- DROP INDEX public.lines_stats_process_id_placement;

CREATE INDEX lines_stats_process_id_placement
  ON public.lines_stats_process
  USING btree
  (id_placement);

-- Index: public.lines_stats_process_id_site

-- DROP INDEX public.lines_stats_process_id_site;

CREATE INDEX lines_stats_process_id_site
  ON public.lines_stats_process
  USING btree
  (id_site);

-- Index: public.lines_stats_process_lsp

-- DROP INDEX public.lines_stats_process_lsp;

CREATE INDEX lines_stats_process_lsp
  ON public.lines_stats_process
  USING btree
  (id_data_source_connection, advertiser_name COLLATE pg_catalog."default", size_name COLLATE pg_catalog."default", dsp COLLATE pg_catalog."default", id_advertiser, id_placement, id_site, id_campaign, id_sys_country);

-- Index: public.lines_stats_process_lsp2

-- DROP INDEX public.lines_stats_process_lsp2;

CREATE INDEX lines_stats_process_lsp2
  ON public.lines_stats_process
  USING btree
  (id_data_source_connection, id_sys_country, advertiser_name COLLATE pg_catalog."default", size_name COLLATE pg_catalog."default", dsp COLLATE pg_catalog."default", id_advertiser, id_placement, id_site, id_campaign);

-- Index: public.lines_stats_process_size_name

-- DROP INDEX public.lines_stats_process_size_name;

CREATE INDEX lines_stats_process_size_name
  ON public.lines_stats_process
  USING btree
  (size_name COLLATE pg_catalog."default");

这是EXPLAIN ANALYZE

lines_stats_process有60K行。我将它限制在800以便有合理的执行时间。

我做了几次测试,我已禁用以下选项:

SET enable_nestloop =off;
SET enable_material = off;
SET enable_seqscan = off;

但永远不会有更好的表现。

唯一有效的更改是替换IS NOT DISTINCT FROM的{​​{1}}。更改后,执行只需不到一秒钟。 当然,该运算符对我的逻辑不正确,所以我需要使用=,因为我需要进行空安全比较。 完成所有替换后,我开始逐一放回IS NOT DISTINCT FROM。当我放回最后IS NOT DISTINCT FROM时,查询执行时间太长。

我将IS NOT DISTINCT FROM置于任何其他具有相同效果的条件下。 8 =需要太长时间。使用7 IS NOT DISTINCT FROM时,执行时间不到一秒。

对不起我的英语,不是我的母语。

0 个答案:

没有答案