我正在处理JOIN两个表,如下所示:
SELECT count(*) FROM (
SELECT *
FROM (SELECT * FROM lines_stats_process limit 800) lsp
LEFT JOIN lines l ON
(lsp.id_data_source_connection = l.id_data_source_connection) AND
(lsp.id_sys_country IS NOT DISTINCT FROM l.id_sys_country) AND
(lsp.advertiser_name IS NOT DISTINCT FROM l.advertiser_name) AND
(lsp.size_name IS NOT DISTINCT FROM l.size_name) AND
(lsp.dsp IS NOT DISTINCT FROM l.dsp) AND
(lsp.id_advertiser IS NOT DISTINCT FROM l.id_advertiser) AND
(lsp.id_placement IS NOT DISTINCT FROM l.id_placement) AND
(lsp.id_site IS NOT DISTINCT FROM l.id_site) AND
(lsp.id_campaign IS NOT DISTINCT FROM l.id_campaign)
) test;
注意:我正在使用计数和限制800仅用于测试目的。没有限制,查询永远不会结束执行。
我的DDL:
CREATE TABLE public.lines
(
id integer NOT NULL DEFAULT nextval('lines_seq'::regclass),
id_account integer NOT NULL,
id_data_source integer NOT NULL,
id_data_source_connection integer NOT NULL,
id_client integer,
id_advertiser bigint,
advertiser_name character varying(200) DEFAULT NULL::character varying,
id_campaign bigint,
campaign_name character varying(200) DEFAULT NULL::character varying,
id_group bigint,
group_name character varying(200) DEFAULT NULL::character varying,
id_ad bigint,
ad_name character varying(200) DEFAULT NULL::character varying,
id_site bigint,
site_name character varying(200) DEFAULT NULL::character varying,
id_placement bigint,
placement_name character varying(200) DEFAULT NULL::character varying,
id_format bigint,
format_name character varying(200) DEFAULT NULL::character varying,
id_size bigint,
size_name character varying(200) DEFAULT NULL::character varying,
created_at timestamp(0) without time zone DEFAULT ('now'::text)::date,
updated_at timestamp(0) without time zone DEFAULT ('now'::text)::date,
id_ds_advertiser integer,
id_ds_campaign integer,
id_ds_group integer,
id_ds_ad integer,
id_ds_site integer,
id_ds_placement integer,
id_ds_format integer,
id_publisher integer,
id_country integer,
dsp character varying(45) DEFAULT NULL::character varying,
country_name character varying(200) DEFAULT NULL::character varying,
id_ds_country integer,
id_sys_country integer,
CONSTRAINT lines_pkey PRIMARY KEY (id),
CONSTRAINT lines_id_check CHECK (id > 0)
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.lines
OWNER TO postgres;
GRANT ALL ON TABLE public.lines TO postgres;
-- Index: public.data_source_connection_lines
-- DROP INDEX public.data_source_connection_lines;
CREATE INDEX data_source_connection_lines
ON public.lines
USING btree
(id_data_source_connection);
-- Index: public.id_data_source_connection211
-- DROP INDEX public.id_data_source_connection211;
CREATE INDEX id_data_source_connection211
ON public.lines
USING btree
(id_data_source_connection);
-- Index: public.lines_advertiser_name
-- DROP INDEX public.lines_advertiser_name;
CREATE INDEX lines_advertiser_name
ON public.lines
USING btree
(advertiser_name COLLATE pg_catalog."default");
-- Index: public.lines_countries
-- DROP INDEX public.lines_countries;
CREATE INDEX lines_countries
ON public.lines
USING btree
(id_country);
-- Index: public.lines_dsp
-- DROP INDEX public.lines_dsp;
CREATE INDEX lines_dsp
ON public.lines
USING btree
(dsp COLLATE pg_catalog."default");
-- Index: public.lines_id_advertiser
-- DROP INDEX public.lines_id_advertiser;
CREATE INDEX lines_id_advertiser
ON public.lines
USING btree
(id_advertiser);
-- Index: public.lines_id_campaign
-- DROP INDEX public.lines_id_campaign;
CREATE INDEX lines_id_campaign
ON public.lines
USING btree
(id_campaign);
-- Index: public.lines_id_data_source_connection
-- DROP INDEX public.lines_id_data_source_connection;
CREATE INDEX lines_id_data_source_connection
ON public.lines
USING btree
(id_data_source_connection);
-- Index: public.lines_id_placement
-- DROP INDEX public.lines_id_placement;
CREATE INDEX lines_id_placement
ON public.lines
USING btree
(id_placement);
-- Index: public.lines_id_site
-- DROP INDEX public.lines_id_site;
CREATE INDEX lines_id_site
ON public.lines
USING btree
(id_site);
-- Index: public.lines_lsp
-- DROP INDEX public.lines_lsp;
CREATE INDEX lines_lsp
ON public.lines
USING btree
(id_data_source_connection, advertiser_name COLLATE pg_catalog."default", size_name COLLATE pg_catalog."default", dsp COLLATE pg_catalog."default", id_advertiser, id_placement, id_site, id_campaign, id_sys_country);
-- Index: public.lines_lsp2
-- DROP INDEX public.lines_lsp2;
CREATE INDEX lines_lsp2
ON public.lines
USING btree
(id_data_source_connection, id_country, advertiser_name COLLATE pg_catalog."default", size_name COLLATE pg_catalog."default", dsp COLLATE pg_catalog."default", id_advertiser, id_placement, id_site, id_campaign);
-- Index: public.lines_size_name
-- DROP INDEX public.lines_size_name;
CREATE INDEX lines_size_name
ON public.lines
USING btree
(size_name COLLATE pg_catalog."default");
-- Table: public.lines_stats_process
-- DROP TABLE public.lines_stats_process;
CREATE TABLE public.lines_stats_process
(
id integer NOT NULL DEFAULT nextval('lines_stats_process_seq'::regclass),
id_line integer,
id_line_stat integer,
id_account integer NOT NULL,
id_data_source integer NOT NULL,
id_data_source_connection integer NOT NULL,
id_client integer,
id_advertiser bigint,
advertiser_name character varying(200) DEFAULT NULL::character varying,
id_campaign bigint,
campaign_name character varying(200) DEFAULT NULL::character varying,
id_group bigint,
group_name character varying(200) DEFAULT NULL::character varying,
id_ad bigint,
ad_name character varying(200) DEFAULT NULL::character varying,
id_site bigint,
site_name character varying(200) DEFAULT NULL::character varying,
id_placement bigint,
placement_name character varying(200) DEFAULT NULL::character varying,
id_format bigint,
format_name character varying(200) DEFAULT NULL::character varying,
id_size bigint,
size_name character varying(200) DEFAULT NULL::character varying,
created_at timestamp(0) without time zone DEFAULT ('now'::text)::date,
updated_at timestamp(0) without time zone DEFAULT ('now'::text)::date,
id_ds_advertiser integer,
id_ds_campaign integer,
id_ds_group integer,
id_ds_ad integer,
id_ds_site integer,
id_ds_placement integer,
id_ds_format integer,
id_publisher integer,
id_country integer,
dsp character varying(45) DEFAULT NULL::character varying,
date date NOT NULL,
country_name character varying(50) DEFAULT NULL::character varying,
id_sys_country integer,
id_ds_country integer,
budget numeric(10,2) DEFAULT NULL::numeric,
bid numeric(10,2) DEFAULT NULL::numeric,
CONSTRAINT lines_stats_process_pkey PRIMARY KEY (id),
CONSTRAINT lines_stats_process_id_check CHECK (id > 0)
)
WITH (
FILLFACTOR=70,
OIDS=FALSE
);
ALTER TABLE public.lines_stats_process
OWNER TO postgres;
GRANT ALL ON TABLE public.lines_stats_process TO postgres;
-- Index: public.data_source_connection_lsp
-- DROP INDEX public.data_source_connection_lsp;
CREATE INDEX data_source_connection_lsp
ON public.lines_stats_process
USING btree
(id_data_source_connection);
-- Index: public.id_data_source_connection21
-- DROP INDEX public.id_data_source_connection21;
CREATE INDEX id_data_source_connection21
ON public.lines_stats_process
USING btree
(id_data_source_connection);
-- Index: public.lines_stats_process_advertiser_name
-- DROP INDEX public.lines_stats_process_advertiser_name;
CREATE INDEX lines_stats_process_advertiser_name
ON public.lines_stats_process
USING btree
(advertiser_name COLLATE pg_catalog."default");
-- Index: public.lines_stats_process_countries
-- DROP INDEX public.lines_stats_process_countries;
CREATE INDEX lines_stats_process_countries
ON public.lines_stats_process
USING btree
(id_sys_country);
-- Index: public.lines_stats_process_dsp
-- DROP INDEX public.lines_stats_process_dsp;
CREATE INDEX lines_stats_process_dsp
ON public.lines_stats_process
USING btree
(dsp COLLATE pg_catalog."default");
-- Index: public.lines_stats_process_id_advertiser
-- DROP INDEX public.lines_stats_process_id_advertiser;
CREATE INDEX lines_stats_process_id_advertiser
ON public.lines_stats_process
USING btree
(id_advertiser);
-- Index: public.lines_stats_process_id_campaign
-- DROP INDEX public.lines_stats_process_id_campaign;
CREATE INDEX lines_stats_process_id_campaign
ON public.lines_stats_process
USING btree
(id_campaign);
-- Index: public.lines_stats_process_id_data_source_connection
-- DROP INDEX public.lines_stats_process_id_data_source_connection;
CREATE INDEX lines_stats_process_id_data_source_connection
ON public.lines_stats_process
USING btree
(id_data_source_connection);
-- Index: public.lines_stats_process_id_placement
-- DROP INDEX public.lines_stats_process_id_placement;
CREATE INDEX lines_stats_process_id_placement
ON public.lines_stats_process
USING btree
(id_placement);
-- Index: public.lines_stats_process_id_site
-- DROP INDEX public.lines_stats_process_id_site;
CREATE INDEX lines_stats_process_id_site
ON public.lines_stats_process
USING btree
(id_site);
-- Index: public.lines_stats_process_lsp
-- DROP INDEX public.lines_stats_process_lsp;
CREATE INDEX lines_stats_process_lsp
ON public.lines_stats_process
USING btree
(id_data_source_connection, advertiser_name COLLATE pg_catalog."default", size_name COLLATE pg_catalog."default", dsp COLLATE pg_catalog."default", id_advertiser, id_placement, id_site, id_campaign, id_sys_country);
-- Index: public.lines_stats_process_lsp2
-- DROP INDEX public.lines_stats_process_lsp2;
CREATE INDEX lines_stats_process_lsp2
ON public.lines_stats_process
USING btree
(id_data_source_connection, id_sys_country, advertiser_name COLLATE pg_catalog."default", size_name COLLATE pg_catalog."default", dsp COLLATE pg_catalog."default", id_advertiser, id_placement, id_site, id_campaign);
-- Index: public.lines_stats_process_size_name
-- DROP INDEX public.lines_stats_process_size_name;
CREATE INDEX lines_stats_process_size_name
ON public.lines_stats_process
USING btree
(size_name COLLATE pg_catalog."default");
lines_stats_process有60K行。我将它限制在800以便有合理的执行时间。
我做了几次测试,我已禁用以下选项:
SET enable_nestloop =off;
SET enable_material = off;
SET enable_seqscan = off;
但永远不会有更好的表现。
唯一有效的更改是替换IS NOT DISTINCT FROM
的{{1}}。更改后,执行只需不到一秒钟。
当然,该运算符对我的逻辑不正确,所以我需要使用=
,因为我需要进行空安全比较。
完成所有替换后,我开始逐一放回IS NOT DISTINCT FROM
。当我放回最后IS NOT DISTINCT FROM
时,查询执行时间太长。
我将IS NOT DISTINCT FROM
置于任何其他具有相同效果的条件下。 8 =
需要太长时间。使用7 IS NOT DISTINCT FROM
时,执行时间不到一秒。
对不起我的英语,不是我的母语。