我有。一个大查询要花10多分钟才能执行,我想进一步优化它。我添加了很少的索引,但对执行没有太大影响。
下面是查询计划,关于如何提高执行速度的任何建议
任何指导都会有很大帮助。
PostgreSQL版本11
"Hash Left Join (cost=564692523.83..564692533.09 rows=200 width=1032)"
" Hash Cond: (fo.customer_email = loc.customer_email)"
" CTE base"
" -> Unique (cost=2818612.53..2915284.49 rows=798528 width=309)"
" -> Sort (cost=2818612.53..2823006.71 rows=1757672 width=309)"
" Sort Key: (first_value(((cube.customer_first_name || ' '::text) || cube.customer_last_name)) OVER (?)), cube.customer_email, (first_value(cube.customer_first_name) OVER (?)), (first_value(cube.customer_last_name) OVER (?)), cube.store_id, (first_value(cube.customer_group_name) OVER (?)), cube.order_id, cube.cat_level_1_name, cube.cat_level_2_name, cube.cat_level_3_name, cube.item_sku, cube.product_name_sku, cube.shipping_country_id, cube.default_channel_grouping, cube.first_order_of_customer, cube.created_at_order, cube.order_subtotal_net_after_discount, cube.row_number, cube.customer_city, cube.order_number, (row_number() OVER (?))"
" -> WindowAgg (cost=2348817.00..2383970.44 rows=1757672 width=309)"
" -> Sort (cost=2348817.00..2353211.18 rows=1757672 width=323)"
" Sort Key: cube.customer_email, cube.created_at_order DESC"
" -> WindowAgg (cost=1862615.29..1902162.91 rows=1757672 width=323)"
" -> Sort (cost=1862615.29..1867009.47 rows=1757672 width=291)"
" Sort Key: cube.customer_email, ((cube.customer_group_name IS NULL)), cube.created_at_order DESC NULLS LAST"
" -> WindowAgg (cost=1400441.09..1439988.71 rows=1757672 width=291)"
" -> Sort (cost=1400441.09..1404835.27 rows=1757672 width=259)"
" Sort Key: cube.customer_email, ((cube.customer_last_name IS NULL)), cube.created_at_order DESC NULLS LAST"
" -> WindowAgg (cost=962297.88..1001845.50 rows=1757672 width=259)"
" -> Sort (cost=962297.88..966692.06 rows=1757672 width=227)"
" Sort Key: cube.customer_email, ((cube.customer_first_name IS NULL)), cube.created_at_order DESC NULLS LAST"
" -> WindowAgg (cost=539397.31..587733.29 rows=1757672 width=227)"
" -> Sort (cost=539397.31..543791.49 rows=1757672 width=195)"
" Sort Key: cube.customer_email, (((cube.customer_first_name IS NULL) AND (cube.customer_last_name IS NULL))), cube.created_at_order DESC NULLS LAST"
" -> Seq Scan on cube (cost=0.00..188863.72 rows=1757672 width=195)"
" CTE last_order_top_value"
" -> Unique (cost=705414.33..705431.08 rows=200 width=200)"
" CTE last_order"
" -> WindowAgg (cost=104658.07..129785.32 rows=670060 width=49)"
" -> WindowAgg (cost=104658.07..118059.27 rows=670060 width=41)"
" -> Sort (cost=104658.07..106333.22 rows=670060 width=37)"
" Sort Key: orders.customer_email, orders.created_at_order"
" -> Seq Scan on orders (cost=0.00..39816.60 rows=670060 width=37)"
" CTE cat_url"
" -> Gather (cost=1000.00..59878.16 rows=8269 width=64)"
" Workers Planned: 4"
" -> Parallel Seq Scan on core_url_rewrite (cost=0.00..58051.26 rows=2067 width=64)"
" Filter: ((id_path)::text ~~ '%category%'::text)"
" CTE prod_url"
" -> Gather (cost=1000.00..62692.26 rows=36410 width=64)"
" Workers Planned: 4"
" -> Parallel Seq Scan on core_url_rewrite core_url_rewrite_1 (cost=0.00..58051.26 rows=9102 width=64)"
" Filter: ((category_id IS NULL) AND ((id_path)::text ~~ '%product%'::text))"
" CTE base"
" -> WindowAgg (cost=424384.90..437786.10 rows=670060 width=227)"
" -> Sort (cost=424384.90..426060.05 rows=670060 width=199)"
" Sort Key: lo.customer_email"
" -> Hash Left Join (cost=128458.52..295416.42 rows=670060 width=199)"
" Hash Cond: ((i.product_id = p.product_id) AND (i.store_id = p.store_id))"
" -> Hash Left Join (cost=79411.82..219567.33 rows=670060 width=58)"
" Hash Cond: (lo.order_id = i.order_id)"
" Join Filter: (lo.last_order = lo.order_id)"
" -> CTE Scan on last_order lo (cost=0.00..13401.20 rows=670060 width=44)"
" -> Hash (cost=46992.70..46992.70 rows=1765770 width=22)"
" -> Seq Scan on items i (cost=0.00..46992.70 rows=1765770 width=22)"
" -> Hash (cost=48246.90..48246.90 rows=53320 width=157)"
" -> Hash Right Join (cost=46408.20..48246.90 rows=53320 width=157)"
" Hash Cond: ((pu.product_id = p.product_id) AND (pu.store_id = p.store_id))"
" -> CTE Scan on prod_url pu (cost=0.00..728.20 rows=36410 width=48)"
" -> Hash (cost=45608.40..45608.40 rows=53320 width=125)"
" -> Merge Left Join (cost=44693.88..45608.40 rows=53320 width=125)"
" Merge Cond: ((p.store_id = cu.store_id) AND (((p.cat_level_2_id)::integer) = cu.category_id))"
" -> Sort (cost=43990.46..44123.76 rows=53320 width=97)"
" Sort Key: p.store_id, ((p.cat_level_2_id)::integer)"
" -> Seq Scan on products p (cost=0.00..39804.20 rows=53320 width=97)"
" -> Sort (cost=703.42..724.10 rows=8269 width=48)"
" Sort Key: cu.store_id, cu.category_id"
" -> CTE Scan on cat_url cu (cost=0.00..165.38 rows=8269 width=48)"
" -> Sort (cost=15272.49..15280.87 rows=3350 width=200)"
" Sort Key: base.customer_email"
" -> CTE Scan on base (cost=0.00..15076.35 rows=3350 width=200)"
" Filter: (max_amount = amount)"
" CTE repurchase"
" -> Hash Left Join (cost=2.64..20903.71 rows=231416 width=44)"
" Hash Cond: (lower((c.top_first_cat3)::text) = lower(art.top_first_cat3))"
" InitPlan 7 (returns $8)"
" -> Seq Scan on avg_repurchase_times (cost=0.00..1.25 rows=1 width=4)"
" Filter: (lower(top_first_cat3) = 'rest'::text)"
" -> Seq Scan on customers c (cost=0.00..14051.16 rows=231416 width=48)"
" -> Hash (cost=1.17..1.17 rows=17 width=14)"
" -> Seq Scan on avg_repurchase_times art (cost=0.00..1.17 rows=17 width=14)"
" CTE first_order_top_value_category"
" -> CTE Scan on cat_seg (cost=282871.58..286770.09 rows=866 width=104)"
" Filter: (hno = 1)"
" CTE cat_seg"
" -> Sort (cost=282438.41..282871.58 rows=173267 width=44)"
" Sort Key: cube_1.customer_email"
" -> HashAggregate (cost=265629.23..267361.90 rows=173267 width=44)"
" Group Key: cube_1.customer_email, cube_1.cat_level_1_name, cube_1.row_amount_order, row_number() OVER (?)"
" -> WindowAgg (cost=247363.16..259540.54 rows=608869 width=44)"
" -> Sort (cost=247363.16..248885.33 rows=608869 width=36)"
" Sort Key: cube_1.customer_email, cube_1.row_amount_order DESC"
" -> Seq Scan on cube cube_1 (cost=0.00..188863.72 rows=608869 width=36)"
" Filter: (first_order_of_customer IS TRUE)"
" CTE final_output"
" -> WindowAgg (cost=305171.04..560488690.38 rows=200 width=968)"
" -> GroupAggregate (cost=305171.04..560488672.38 rows=200 width=804)"
" Group Key: b.customer_email"
" -> Merge Left Join (cost=305171.04..60394894.75 rows=4000750169 width=717)"
" Merge Cond: (b.customer_email = r.customer_email)"
" -> Merge Left Join (cost=279923.40..349754.67 rows=3457626 width=705)"
" Merge Cond: (b.customer_email = fotvc.customer_email)"
" -> Merge Left Join (cost=279863.83..295834.39 rows=798528 width=673)"
" Merge Cond: (b.customer_email = lotv.customer_email)"
" -> Sort (cost=279852.19..281848.51 rows=798528 width=513)"
" Sort Key: b.customer_email"
" -> CTE Scan on base b (cost=0.00..15970.56 rows=798528 width=513)"
" -> Sort (cost=11.64..12.14 rows=200 width=192)"
" Sort Key: lotv.customer_email"
" -> CTE Scan on last_order_top_value lotv (cost=0.00..4.00 rows=200 width=192)"
" -> Sort (cost=59.57..61.74 rows=866 width=64)"
" Sort Key: fotvc.customer_email"
" -> CTE Scan on first_order_top_value_category fotvc (cost=0.00..17.32 rows=866 width=64)"
" -> Sort (cost=25247.63..25826.17 rows=231416 width=44)"
" Sort Key: r.customer_email"
" -> CTE Scan on repurchase r (cost=0.00..4628.32 rows=231416 width=44)"
" CTE last_order_category"
" -> GroupAggregate (cost=275444.02..275444.06 rows=1 width=117)"
" Group Key: c_1.customer_email"
" -> Sort (cost=275444.02..275444.03 rows=1 width=63)"
" Sort Key: c_1.customer_email"
" -> Hash Join (cost=235826.80..275444.01 rows=1 width=63)"
" Hash Cond: ((b_1.customer_email = c_1.customer_email) AND ((b_1.order_number)::text = (c_1.order_number)::text))"
" -> CTE Scan on base b_1 (cost=0.00..17966.88 rows=3993 width=72)"
" Filter: (last_no = 1)"
" -> Hash (cost=188863.72..188863.72 rows=1757672 width=65)"
" -> Seq Scan on cube c_1 (cost=0.00..188863.72 rows=1757672 width=65)"
" -> CTE Scan on final_output fo (cost=0.00..4.00 rows=200 width=936)"
" -> Hash (cost=0.02..0.02 rows=1 width=128)"
" -> CTE Scan on last_order_category loc (cost=0.00..0.02 rows=1 width=128)"
下面是查询,使用该查询通过“ CREATE TABLE segmentation AS”语句创建表。
-- EXPLAIN
CREATE table segment as
WITH base AS (
SELECT DISTINCT first_value(customer_first_name || ' ' || customer_last_name)
OVER (
PARTITION BY customer_email
ORDER BY (customer_first_name ISNULL AND customer_last_name ISNULL),
created_at_order DESC NULLS LAST ) AS name,
customer_email,
first_value(customer_first_name)
OVER (
PARTITION BY customer_email
ORDER BY customer_first_name ISNULL,
created_at_order DESC NULLS LAST ) AS first_name,
first_value(customer_last_name)
OVER (
PARTITION BY customer_email
ORDER BY customer_last_name ISNULL,
created_at_order DESC NULLS LAST ) AS last_name,
store_id,
first_value(customer_group_name)
OVER (
PARTITION BY customer_email
ORDER BY customer_group_name ISNULL,
created_at_order DESC NULLS LAST ) AS customer_group,
order_id,
cat_level_1_name,
cat_level_2_name,
cat_level_3_name,
item_sku,
product_name_sku,
shipping_country_id,
default_channel_grouping,
first_order_of_customer,
created_at_order,
order_subtotal_net_after_discount,
row_number,
customer_city
,order_number
,row_number() OVER(PARTITION BY customer_email ORDER BY created_at_order DESC) as last_no
FROM ol.cube
--WHERE order_state_1 != 'canceled' AND order_state_1 != 'pending_payment'
),
last_order_top_value AS (
WITH last_order AS (
SELECT customer_id,
last_value(order_id)
OVER (
PARTITION BY customer_email
ORDER BY created_at_order
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS last_order,
order_id,
row_number()
OVER (
PARTITION BY customer_email
ORDER BY created_at_order ) AS rn,
created_at_order,
customer_email
FROM ol.orders
),
cat_url AS (
SELECT category_id,
store_id,
request_path
FROM dl_magento.core_url_rewrite
WHERE id_path LIKE '%category%'
),
prod_url AS (
SELECT product_id,
store_id,
request_path AS request_path
FROM dl_magento.core_url_rewrite
WHERE id_path LIKE '%product%'
AND category_id IS NULL
),
base AS (
SELECT lo.customer_id,
lo.order_id,
lo.customer_email,
i.item_id,
p.product_id,
i.row_amount_minus_discount_order AS amount,
max(i.row_amount_minus_discount_order)
OVER (
PARTITION BY lo.customer_email ) AS max_amount,
cat_level_2_name AS category,
'http://xxxxxxx/media/catalog/product' || image AS image,
name,
cu.request_path AS category_request_path,
pu.request_path AS product_request_path
FROM last_order AS lo
LEFT JOIN ol.items AS i ON lo.last_order = lo.order_id AND lo.order_id = i.order_id
LEFT JOIN ol.products AS p ON i.product_id = p.product_id AND i.store_id = p.store_id
LEFT JOIN cat_url AS cu
ON p.cat_level_2_id :: INT = cu.category_id AND p.store_id = cu.store_id
LEFT JOIN prod_url AS pu ON p.product_id = pu.product_id AND p.store_id = pu.store_id
)
SELECT DISTINCT ON (customer_email) customer_email,
order_id,
customer_id,
name,
image,
category,
category_request_path,
product_request_path
FROM base
WHERE max_amount = amount
),
repurchase AS (
SELECT c.customer_email,
c.top_first_cat3,
date_part('day', 'today' :: DATE - c.last_order_date) :: INT AS days_since_last_order,
CASE
WHEN art.avg_repurchase_time IS NULL THEN (SELECT avg_repurchase_time
FROM ol.avg_repurchase_times
WHERE lower(top_first_cat3) = 'rest')
ELSE art.avg_repurchase_time
END
,
CASE
WHEN (now()::date - c.last_order_date::date) = 60 AND COALESCE(c.num_orders, 0) != 1 THEN 1
ELSE 0
END AS reactivation_delay
FROM ol.customers c
LEFT JOIN ol.avg_repurchase_times art ON lower(art.top_first_cat3) = lower(c.top_first_cat3)
),
first_order_top_value_category as
(
WITH cat_seg as
(
select DISTINCT customer_email,cat_level_1_name as first_order_category,row_amount_order,ROW_NUMBER() OVER(PARTITION BY customer_email ORDER BY row_amount_order DESC) as hno
from ol.cube WHere first_order_of_customer IS TRUE
ORDER BY customer_email
)
SELECT * FROM cat_seg WHERE hno=1
)
,
final_output AS
(
SELECT max(b.name) AS name,
b.customer_email,
max(b.first_name) AS first_name,
max(b.last_name) AS last_name,
string_agg(DISTINCT b.store_id :: TEXT, ',') AS store_id,
max(b.customer_group) AS customer_group,
string_agg(DISTINCT b.cat_level_1_name, ',') AS cat_level_1,
string_agg(DISTINCT b.cat_level_2_name, ',') AS cat_level_2,
string_agg(DISTINCT b.cat_level_3_name, ',') AS cat_level_3,
string_agg(DISTINCT b.item_sku, ',') AS sku,
string_agg(DISTINCT b.product_name_sku, ',') AS product_name_sku,
string_agg(DISTINCT b.shipping_country_id, ',') AS shipping_country_id,
max(b.default_channel_grouping)
FILTER (WHERE b.first_order_of_customer) AS first_order_acquisition_channel,
min(b.created_at_order) AS first_order_date,
CASE
WHEN (max(r.days_since_last_order) = max(r.avg_repurchase_time)) THEN 1
ELSE 0 END :: NUMERIC AS repurchase_delay,
COALESCE(MAX(reactivation_delay), 0) AS reactivation_delay
,
CASE
WHEN (max(r.days_since_last_order) BETWEEN max(r.avg_repurchase_time) AND max(r.avg_repurchase_time) + 16)
THEN 1
ELSE 0 END :: NUMERIC AS has_voucher_code,
CASE
WHEN (max(r.days_since_last_order) BETWEEN max(r.avg_repurchase_time) AND max(r.avg_repurchase_time) + 16)
THEN
(current_date - ((max(r.days_since_last_order) - max(r.avg_repurchase_time)) ||
' days') :: INTERVAL) :: DATE
ELSE NULL END AS voucher_date,
count(DISTINCT b.order_id) AS num_orders,
count(DISTINCT b.order_id) :: NUMERIC /
floor(((date_part('day', max(b.created_at_order) - min(b.created_at_order)) / 30) +
1)) AS orders_per_month,
sum(b.order_subtotal_net_after_discount)
FILTER (WHERE b.row_number = 1) AS alltime_rev,
sum(b.order_subtotal_net_after_discount)
FILTER (WHERE b.row_number = 1) /
nullif(count(DISTINCT b.order_id), 0) AS average_rev_per_order,
min(b.order_subtotal_net_after_discount) AS min_rev,
max(b.order_subtotal_net_after_discount) AS max_rev,
max(b.created_at_order) AS last_order_date,
max(lotv.name) AS last_order_top_value_product_name,
max(lotv.category) AS last_order_top_value_product_category_name,
max(lotv.category_request_path) AS last_order_top_value_product_category_url,
max(lotv.product_request_path) AS last_order_top_value_product_url,
max(lotv.image) AS last_order_top_value_product_image_url,
max(customer_city) AS customer_city,
CAST('' AS TEXT) AS "TAGS"
,NTILE(9) OVER ()::TEXT AS customer_partition
,MAX(first_order_category) AS first_order_category
,MAX(b.order_number)
FILTER (WHERE b.last_no=1) AS last_order_number
FROM base AS b
LEFT JOIN last_order_top_value AS lotv ON b.customer_email = lotv.customer_email
LEFT JOIN repurchase AS r ON b.customer_email = r.customer_email
LEFT JOIN first_order_top_value_category AS fotvc ON fotvc.customer_email=b.customer_email
GROUP BY 2
)
,last_order_category as( SELECT c.customer_email
,string_agg(DISTINCT c.cat_level_1_name, ',') FILTER(WHERE last_no=1) AS last_order_cat_level_1
,string_agg(DISTINCT c.cat_level_2_name, ',') FILTER(WHERE last_no=1) AS last_order_cat_level_2
,string_agg(DISTINCT c.cat_level_3_name, ',') FILTER(WHERE last_no=1) AS last_order_cat_level_3
from ol.cube c
INNER JOIN base b on b.customer_email=c.customer_email and c.order_number=b.order_number
WHERE b.last_no=1
GROUP BY c.customer_email
)
SELECT name,
fo.customer_email,
first_name,
last_name,
store_id,
customer_group,
cat_level_1,
cat_level_2,
cat_level_3,
sku,
product_name_sku,
shipping_country_id,
first_order_acquisition_channel,
first_order_date,
repurchase_delay,
reactivation_delay,
has_voucher_code,
voucher_date,
num_orders,
orders_per_month,
alltime_rev,
average_rev_per_order,
min_rev,
max_rev,
last_order_date,
last_order_top_value_product_name,
last_order_top_value_product_category_name,
last_order_top_value_product_category_url,
last_order_top_value_product_url,
last_order_top_value_product_image_url,
customer_city,
CASE
WHEN customer_partition = '1' THEN 'K'
WHEN customer_partition = '2' THEN 'D'
WHEN customer_partition = '3' THEN 'v'
WHEN customer_partition = '4' THEN 'W'
WHEN customer_partition = '5' THEN 'J'
WHEN customer_partition = '6' THEN 'i'
WHEN customer_partition = '7' THEN 'u'
WHEN customer_partition = '8' THEN 'n'
WHEN customer_partition = '9' THEN 'P'
ELSE '' END AS "TAGS"
,first_order_category
,last_order_number,loc.last_order_cat_level_1,loc.last_order_cat_level_2,loc.last_order_cat_level_3
FROM final_output fo LEFT JOIN last_order_category loc on loc.customer_email=fo.customer_email;
下面是索引,
CREATE INDEX ON ol.cube (customer_email,created_at_order);