基本上,我有3个表:customer_profiles_lib,customer_profiles_tmp和customer_duplicates_tmp。
我想检查来自customer_profiles_lib的每条记录是否在customer_profiles_tmp中...如果不是,请INSERT到customer_profiles_tmp ......如果是,请INSERT INTO customer_duplicates_tmp。
我在一个程序中尝试了这个,但我有900万条记录需要处理,而且速度太慢......这就是我所拥有的:
CREATE DEFINER=`company`@`%` PROCEDURE `customerImport`()
BEGIN
DECLARE unique_id INT;
DECLARE fin INT;
DECLARE curs CURSOR FOR SELECT customer_id AS unique_id FROM customer_profiles_lib;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET fin = 1;
OPEN curs;
SET fin = 0;
REPEAT
FETCH curs INTO unique_id;
IF (SELECT EXISTS (SELECT customer_id FROM customer_profiles_tmp WHERE customer_id = unique_id)) THEN
SELECT unique_id AS 'ADDING';
INSERT IGNORE INTO customer_duplicates_tmp (first, last, address_1, address_2, city, state, zipcode, email, customer_id, phone, store_number)
SELECT first, last, address_1, address_2, city, state, zipcode, email, customer_id, phone, store_number FROM customer_profiles_lib WHERE customer_id = unique_id ORDER BY customer_profile_id DESC LIMIT 1;
ELSE
SELECT unique_id AS 'SKIPPING';
INSERT IGNORE INTO customer_profiles_tmp (first, last, address_1, address_2, city, state, zipcode, email, customer_id, phone, store_number)
SELECT first, last, address_1, address_2, city, state, zipcode, email, customer_id, phone, store_number FROM customer_profiles_lib WHERE customer_id = unique_id ORDER BY customer_profile_id DESC LIMIT 1;
END IF;
UNTIL fin END REPEAT;
CLOSE curs;
END
这种方式需要1个小时,适用于插入,但不会在customer_duplicates_tmp表中放置任何内容。
INSERT IGNORE INTO customer_profiles_tmp (
first,
last,
address_1,
address_2,
city,
state,
zipcode,
email,
customer_id,
phone,
store_number
)
SELECT
tmp.first,
tmp.last,
tmp.address_1,
tmp.address_2,
tmp.city,
tmp.state,
tmp.zipcode,
tmp.email,
tmp.customer_id,
tmp.phone,
tmp.store_number
FROM customer_profiles_lib AS tmp;
感谢您的帮助!
答案 0 :(得分:1)
似乎整个RBAR过程可以被两个SQL语句取代,性能得到显着提升:
INSERT IGNORE INTO customer_duplicates_tmp
(first, last, address_1, address_2, city, state, zipcode, email, customer_id, phone, store_number)
SELECT
first, last, address_1, address_2, city, state, zipcode, email, customer_id, phone, store_number
FROM customer_profiles_lib
WHERE customer_id IN (SELECT customer_id FROM customer_profiles_tmp);
INSERT IGNORE INTO customer_profiles_tmp
(first, last, address_1, address_2, city, state, zipcode, email, customer_id, phone, store_number)
SELECT
first, last, address_1, address_2, city, state, zipcode, email, customer_id, phone, store_number
FROM customer_profiles_lib
WHERE customer_id NOT IN (SELECT customer_id FROM customer_profiles_tmp);