我有2个varchar列的表 - col_name1和col_name2
(1, 'hello world', 'hello test'),
(2, 'the stack over', 'over the flow'),
(3, 'hello from my sql fiddle', 'hello my sql');
请参阅SQLFIDDLE MTA
我正在寻找一种方法来查找两列中的重复单词,并删除col_name1中的单词。
这意味着在Mysql操作+ UPDATE + SET之后 - col_name1应该包含如下所示的单词
(1, 'world', 'hello test'),
(2, 'stack', 'over the flow'),
(3, 'from fiddle', 'hello my sql');
答案 0 :(得分:2)
以下是您问题的解决方案:
解决问题陈述的SQL:
update table_name x3
join (
select id,replace(group_concat(w),',',' ') w from (SELECT id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1) w
FROM (SELECT id,concat(col_name1) c FROM table_name) t
INNER JOIN
(
SELECT 1 + a.i + b.i * 10 x
FROM (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a
CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b
) x
ON (LENGTH(t.c) +1 - LENGTH(REPLACE(t.c, ' ', ''))) >= x.x
group by id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1)) x2
where not exists (select 1 from (SELECT id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1) w
FROM (SELECT id,concat(col_name2,' ',col_name1) c FROM table_name) t
INNER JOIN
(
SELECT 1 + a.i + b.i * 10 x
FROM (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a
CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b
) x
ON (LENGTH(t.c) +1 - LENGTH(REPLACE(t.c, ' ', ''))) >= x.x
group by id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1)
having count(1) > 1) x1 where x2.id = x1.id and x2.w = x1.w)
group by id
) x
on x3.id = x.id
set x3.col_name1 = x.w;
mysql> create table table_name(id int, col_name1 varchar(200),col_name2 varchar(200));
Query OK, 0 rows affected (0.36 sec)
mysql> insert into table_name values
-> (1, 'hello world', 'hello test'),
-> (2, 'the stack over', 'over the flow'),
-> (3, 'hello from my sql fiddle', 'hello my sql');
Query OK, 3 rows affected (0.11 sec)
Records: 3 Duplicates: 0 Warnings: 0
mysql> update table_name x3
-> join (
-> select id,replace(group_concat(w),',',' ') w from (SELECT id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1) w
-> FROM (SELECT id,concat(col_name1) c FROM table_name) t
-> INNER JOIN
-> (
-> SELECT 1 + a.i + b.i * 10 x
-> FROM (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a
-> CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b
-> ) x
-> ON (LENGTH(t.c) +1 - LENGTH(REPLACE(t.c, ' ', ''))) >= x.x
-> group by id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1)) x2
-> where not exists (select 1 from (SELECT id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1) w
-> FROM (SELECT id,concat(col_name2,' ',col_name1) c FROM table_name) t
-> INNER JOIN
-> (
-> SELECT 1 + a.i + b.i * 10 x
-> FROM (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a
-> CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b
-> ) x
-> ON (LENGTH(t.c) +1 - LENGTH(REPLACE(t.c, ' ', ''))) >= x.x
-> group by id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1)
-> having count(1) > 1) x1 where x2.id = x1.id and x2.w = x1.w)
-> group by id
-> ) x
-> on x3.id = x.id
-> set x3.col_name1 = x.w;
Query OK, 3 rows affected (0.13 sec)
Rows matched: 3 Changed: 3 Warnings: 0
mysql> select * from table_name;
+------+-------------+---------------+
| id | col_name1 | col_name2 |
+------+-------------+---------------+
| 1 | world | hello test |
| 2 | stack | over the flow |
| 3 | from fiddle | hello my sql |
+------+-------------+---------------+
3 rows in set (0.00 sec)
希望,它会解决你的问题。 一切顺利!!!
update table_name x3
join (
select id,replace(group_concat(w),',',' ') w from (SELECT id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1) w
FROM (SELECT id,concat(col_name1) c FROM table_name) t
INNER JOIN
(
SELECT 1 + a.i + b.i * 10 x
FROM (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a
CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b
CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) c
CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) d
) x
ON (LENGTH(t.c) +1 - LENGTH(REPLACE(t.c, ' ', ''))) >= x.x
group by id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1)) x2
where not exists (select 1 from (SELECT id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1) w
FROM (SELECT id,concat(col_name2,' ',col_name1) c FROM table_name) t
INNER JOIN
(
SELECT 1 + a.i + b.i * 10 x
FROM (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a
CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b
CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) c
CROSS JOIN (SELECT 0 AS i UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) d
) x
ON (LENGTH(t.c) +1 - LENGTH(REPLACE(t.c, ' ', ''))) >= x.x
group by id,SUBSTRING_INDEX(SUBSTRING_INDEX(t.c, ' ', x.x), ' ', -1)
having count(1) > 1) x1 where x2.id = x1.id and x2.w = x1.w)
group by id
) x
on x3.id = x.id
set x3.col_name1 = x.w;
答案 1 :(得分:1)
如果您使用的是MySQL 8.0,则可以使用递归公用表表达式(CTE)迭代字符串以查找所有单词。以下UPDATE语句应该完成工作:
WITH RECURSIVE
word_boundaries (id, i, start, stop) AS (
SELECT id, 1, 1, LOCATE(' ', col_name1, 1) FROM table_name
UNION ALL
SELECT id, i+1, stop+1, LOCATE(' ', col_name1, stop+1)
FROM word_boundaries JOIN table_name USING(id) WHERE stop != 0
),
words(id, i, word) AS (
SELECT id, i, IF(stop != 0,
SUBSTRING(col_name1, start, stop-start),
SUBSTRING(col_name1, start))
FROM table_name JOIN word_boundaries USING(id)
),
unique_words(id, i, word) AS (
SELECT id, i, word FROM words JOIN table_name USING(id)
WHERE LOCATE(word, col_name2) = 0
),
new_text(id, new_colname1) AS (
SELECT id, GROUP_CONCAT(word ORDER BY i SEPARATOR ' ')
FROM unique_words
GROUP BY id
)
UPDATE table_name
SET col_name1 =
(SELECT new_colname1 FROM new_text WHERE id = table_name.id);
此UPDATE语句使用多个CTE:
如果您的字符串可能包含超过1000个空格字符,则必须增加cte_max_recursion_depth的值。将table_name.id定义为PRIMARY KEY应该可以在表大时加快速度。
我认为这个例子展示了CTE在将复杂任务分解为更简单步骤方面的有用性。