我需要通过子字符串匹配计算trade_name
表中grls_data
列与data ->> 'product_info'
表中fsa_raw
之间的交集。
表中的值计数:
grls
:38596 grls_data
:47434 fsa_raw
:651380 我有以下SQL架构:
CREATE TABLE public.grls
(
id integer NOT NULL DEFAULT nextval('grls_id_seq'::regclass),
use_version integer,
CONSTRAINT grls_pkey PRIMARY KEY (id),
CONSTRAINT grls_use_version_foreign FOREIGN KEY (use_version)
REFERENCES public.grls_data (id) MATCH SIMPLE
ON UPDATE CASCADE ON DELETE RESTRICT
)
WITH (
OIDS=FALSE
);
CREATE INDEX grls_use_version_index
ON public.grls
USING btree
(use_version);
CREATE TABLE public.grls_data
(
id integer NOT NULL DEFAULT nextval('grls_data_id_seq'::regclass),
grls_id integer NOT NULL,
trade_name text NOT NULL,
// other columns
CONSTRAINT grls_data_pkey PRIMARY KEY (id),
CONSTRAINT grls_data_grls_id_foreign FOREIGN KEY (grls_id)
REFERENCES public.grls (id) MATCH SIMPLE
ON UPDATE CASCADE ON DELETE RESTRICT
)
WITH (
OIDS=FALSE
);
CREATE INDEX grls_data_grls_id_index
ON public.grls_data
USING btree
(grls_id);
CREATE INDEX grls_data_trade_name_index
ON public.grls_data
USING hash
(trade_name COLLATE pg_catalog."default");
CREATE TABLE public.fsa_raw
(
id integer NOT NULL DEFAULT nextval('fsa_raw_id_seq'::regclass),
data jsonb,
CONSTRAINT fsa_raw_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE INDEX fsa_raw_data_idx
ON public.fsa_raw
USING gin
(data jsonb_path_ops);
CREATE INDEX words_trgm_gin
ON public.fsa_raw
USING gin
((data ->> 'product_info'::text) COLLATE pg_catalog."default" gin_trgm_ops);
所以,我为测试编写了以下SQL查询:
WITH tns AS (
SELECT DISTINCT ('%' || trade_name || '%') AS tn FROM grls JOIN grls_data ON
grls.use_version = grls_data.id
)
SELECT COUNT(*) FROM fsa_raw
WHERE (data ->> 'product_info') ILIKE ANY (SELECT tn FROM tns)
但我的查询费用非常高:
汇总(成本= 169494199.21..169494199.22行= 1宽度= 8)
我无法等待数周,数月才能获得查询结果。 如何优化我的SQL查询以更快地获得结果?