我在表1中有数千行,其中包含许多未知和无效的子串。每个子字符串用逗号分隔,数据不区分大小写。
表1(包含无效的子串)
Row COLUMN_A
1 Beta, gamma, eta, lambda, good
2 Alpha, beta,theta, zeta
3 Alpha, sigma, beta, morning
4 Delta, gamma, zeta, etc
Etc…
表2(包含所有有效的子串)
COLUMN_B
alpha
beta
gamma
theta
lambda
将Column_ A中每行中的每个子字符串与Column_B中的所有值进行比较。
答案 0 :(得分:2)
试试这个 -
<强>查询:强>
DECLARE @string TABLE
(
RowID INT
, Value NVARCHAR(50)
)
INSERT INTO @string (RowID, Value)
VALUES
(1, 'Beta, gamma, eta, lambda, good'),
(2, 'Alpha, beta,theta, zeta'),
(3, 'Alpha, sigma, beta, morning'),
(4, 'Delta, gamma, zeta, etc')
DECLARE @valid_substring TABLE (Value NVARCHAR(20))
INSERT INTO @valid_substring (Value)
VALUES ('alpha'),('beta'),('gamma'),('theta'),('lambda')
;WITH cte AS
(
SELECT t2.*
FROM (
SELECT
t.RowID
, token =
LTRIM(SUBSTRING(
t.Value
, number + 1
, ABS(CHARINDEX(',', t.Value, number + 1) - number - 1)))
FROM (
SELECT t.RowID, Value = ',' + t.Value
FROM @string t
) t
CROSS JOIN [master].dbo.spt_values n
WHERE [type] = 'p'
AND number <= LEN(t.Value) - 1
AND SUBSTRING(t.Value, number, 1) = ','
) t2
JOIN @valid_substring vs ON LOWER(t2.token) = LOWER(vs.value)
)
SELECT t.RowID, Value = STUFF((
SELECT ', ' + token
FROM cte t2
WHERE t2.RowID = T.RowID
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 2, '')
FROM (
SELECT DISTINCT RowID
FROM cte
) t
<强>输出:强>
RowID Value
----------- ---------------------
1 Beta, gamma, lambda
2 Alpha, beta, theta
3 Alpha, beta
4 gamma