下表中有初始行数据-
create table offer_row_data (
customer_key char(20),
offer1_id char(20),
offer1_cd char(20),
offer1_brand_nm char(20),
offer2_id char(20),
offer2_cd char(20),
offer2_brand_nm char(20),
offer3_id char(20),
offer3_cd char(20),
offer3_brand_nm char(20),
offer4_id char(20),
offer4_cd char(20),
offer4_brand_nm char(20)
);
我需要对此进行转换并加载到下表中...
create table offer_data (
offer_key INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
offer_id char(20) NOT NULL,
offer_cd char(20) NOT NULL,
offer_brand_nm char(20));
这是一些随机数据-
INSERT INTO offer_row_data VALUES ('1','offer_id_1a','offe3_cd_1a','offer_nm_1a','offer_id_1b','offe3_cd_1b','offer_nm_1b','offer_id_1c','offe3_cd_1c','offer_nm_1c','offer_id_1d','offe3_cd_1d','offer_nm_1d');
INSERT INTO offer_row_data VALUES ('2','offer_id_2a','offe3_cd_2a','offer_nm_2a','offer_id_2b','offe3_cd_2b','offer_nm_2b','offer_id_2c','offe3_cd_2c','offer_nm_2c','offer_id_2d','offe3_cd_2d','offer_nm_2d');
INSERT INTO offer_row_data VALUES ('3','offer_id_3a','offe3_cd_3a','offer_nm_3a','offer_id_3b','offe3_cd_3b','offer_nm_3b','offer_id_3c','offe3_cd_3c','offer_nm_3c','offer_id_3d','offe3_cd_3d','offer_nm_3d');
这是我的解决方案-
INSERT INTO offer_data (offer_id,offer_cd,offer_brand_nm)
(SELECT distinct offer1_id, offer1_cd, offer1_brand_nm FROM offer_row_data)
UNION
(SELECT distinct offer2_id, offer2_cd, offer2_brand_nm FROM offer_row_data)
UNION
(SELECT distinct offer3_id, offer3_cd, offer3_brand_nm FROM offer_row_data)
UNION
(SELECT distinct offer4_id, offer4_cd, offer4_brand_nm FROM offer_row_data)
因为我拥有2500万个数据集,所以这将是性能负担,并期望有一个更有效的解决方案
示例:
输入:
+--------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+
| customer_key | offer1_id | offer1_cd | offer1_brand_nm | offer2_id | offer2_cd | offer2_brand_nm | offer3_id | offer3_cd | offer3_brand_nm | offer4_id | offer4_cd | offer4_brand_nm |
+--------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+
| 1 | offer_id_1a | offe3_cd_1a | offer_nm_1a | offer_id_1b | offe3_cd_1b | offer_nm_1b | offer_id_1c | offe3_cd_1c | offer_nm_1c | offer_id_1d | offe3_cd_1d | offer_nm_1d |
| 2 | offer_id_2a | offe3_cd_2a | offer_nm_2a | offer_id_2b | offe3_cd_2b | offer_nm_2b | offer_id_2c | offe3_cd_2c | offer_nm_2c | offer_id_2d | offe3_cd_2d | offer_nm_2d |
| 3 | offer_id_3a | offe3_cd_3a | offer_nm_3a | offer_id_3b | offe3_cd_3b | offer_nm_3b | offer_id_3c | offe3_cd_3c | offer_nm_3c | offer_id_3d | offe3_cd_3d | offer_nm_3d |
+--------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+
预期输出:
+-----------+-------------+-------------+----------------+
| offer_key | offer_id | offer_cd | offer_brand_nm |
+-----------+-------------+-------------+----------------+
| 1 | offer_id_1a | offe3_cd_1a | offer_nm_1a |
| 2 | offer_id_2a | offe3_cd_2a | offer_nm_2a |
| 3 | offer_id_3a | offe3_cd_3a | offer_nm_3a |
| 4 | offer_id_1b | offe3_cd_1b | offer_nm_1b |
| 5 | offer_id_2b | offe3_cd_2b | offer_nm_2b |
| 6 | offer_id_3b | offe3_cd_3b | offer_nm_3b |
| 7 | offer_id_1c | offe3_cd_1c | offer_nm_1c |
| 8 | offer_id_2c | offe3_cd_2c | offer_nm_2c |
| 9 | offer_id_3c | offe3_cd_3c | offer_nm_3c |
| 10 | offer_id_1d | offe3_cd_1d | offer_nm_1d |
| 11 | offer_id_2d | offe3_cd_2d | offer_nm_2d |
| 12 | offer_id_3d | offe3_cd_3d | offer_nm_3d |
+-----------+-------------+-------------+----------------+
答案 0 :(得分:0)
如果使用CTE,它只能读取一次数据,而不是原始SQL中的4次,并且读取速度可能更快。
INSERT INTO offer_data (offer_id,offer_cd,offer_brand_nm)
WITH offer_CTE as (SELECT * FROM offer_row_data)
(SELECT distinct offer1_id, offer1_cd, offer1_brand_nm FROM offer_CTE)
UNION
(SELECT distinct offer2_id, offer2_cd, offer2_brand_nm FROM offer_CTE)
UNION
(SELECT distinct offer3_id, offer3_cd, offer3_brand_nm FROM offer_CTE)
UNION
(SELECT distinct offer4_id, offer4_cd, offer4_brand_nm FROM offer_CTE)
请告诉我这是否可以解决您的性能问题。
答案 1 :(得分:0)
我会选择简单且可能最快的方法,如果您不使用MySQL 8.0 / MariaDB 10.2(具有CTE),它会起作用:
INSERT INTO offer_data (offer_id,offer_cd,offer_brand_nm)
SELECT offer1_id, offer1_cd, offer1_brand_nm
FROM wide_table;
INSERT INTO offer_data (offer_id,offer_cd,offer_brand_nm)
SELECT offer2_id, offer2_cd, offer2_brand_nm
FROM wide_table;
INSERT ... 3...
INSERT ... 4...
如果行中有“多达4个”要约,建议您将其添加到SELECT
#2:
WHERE offer2_id IS NOT NULL
OR offer2_cd IS NOT NULL
OR offer2_brand_nm IS NOT NULL
(对于其他选择也是如此。)
DISTINCT
会使速度变慢,但是如果需要,可以这样做。或者,使用INSERT IGNORE
和并准备好某种UNIQUE
键来捕捉公仔。
您可能想重新考虑在新表上是否需要AUTO_INCREMENT
。新列之一(或它们的组合)可能是“自然的” PRIMARY KEY
?
这也是检查数据类型的好时机。也许您不需要8字节的BIGINT
,但可以使用4字节的INT
或3字节的MEDIUMINT
。 char(20)
列是否真的固定长度?他们是utf8吗? VARCHAR(20)
和合适的CHARACTER SET
对于空间和性能可能明显好。
一旦您加载了表,就扔掉旧表。将数组分布在多列中是不好的做法。
答案 2 :(得分:0)
(demicioglu的答案的固定语法错误)
INSERT INTO offer_data (offer_id,offer_cd,offer_brand_nm) (
WITH offer_CTE as (SELECT * FROM offer_row_data)
SELECT * FROM (
(SELECT distinct offer1_id, offer1_cd, offer1_brand_nm FROM offer_CTE)
UNION
(SELECT distinct offer2_id, offer2_cd, offer2_brand_nm FROM offer_CTE)
UNION
(SELECT distinct offer3_id, offer3_cd, offer3_brand_nm FROM offer_CTE)
UNION
(SELECT distinct offer4_id, offer4_cd, offer4_brand_nm FROM offer_CTE)
) a );
暂时比我的回答要快,我的回答涉及4个单独的插入内容。我不知道它是否“仅读取数据一次”。