我有6100万条带有状态的非独特电子邮件。 这封电子邮件需要按状态重复删除。
我编写存储过程,但此过程运行时间很长。
如何优化此程序的执行时间?
CREATE OR REPLACE FUNCTION public.load_oxy_emails() RETURNS boolean AS $$
DECLARE
row record;
rec record;
new_id int;
BEGIN
FOR row IN SELECT * FROM oxy_email ORDER BY id LOOP
SELECT * INTO rec FROM oxy_emails_clean WHERE email = row.email;
IF rec IS NOT NULL THEN
IF row.status = 3 THEN
UPDATE oxy_emails_clean SET status = 3 WHERE id = rec.id;
END IF;
ELSE
INSERT INTO oxy_emails_clean(id, email, status) VALUES(nextval('oxy_emails_clean_id_seq'), row.email, row.status);
SELECT currval('oxy_emails_clean_id_seq') INTO new_id;
INSERT INTO oxy_emails_clean_websites_relation(oxy_emails_clean_id, website_id) VALUES(new_id, row.website_id);
END IF;
END LOOP;
RETURN true;
END;
$$
LANGUAGE 'plpgsql';
答案 0 :(得分:4)
如何优化此程序的执行时间?
不要用循环来做。
执行逐行处理(也称为“慢速慢速”)几乎总是比单个语句“一次性”处理大量行的批量更改慢得多。
可以使用单个语句轻松完成状态更改:
update oxy_emails_clean oec
SET status = 3
from oxy_email oe
where oe.id = oec.id
and oe.status = 3;
可以使用CTEs:
链完成行的复制with to_copy as (
select *
from oxy_email
where status <> 3 --<< all those that have a different status
), clean_inserted as (
INSERT INTO oxy_emails_clean (id, email, status)
select nextval('oxy_emails_clean_id_seq'), email, status
from to_copy
returning id;
)
insert oxy_emails_clean_websites_relation (oxy_emails_clean_id, website_id)
select ci.id, tc.website_id
from clean_inserted ci
join to_copy tc on tc.id = ci.id;