我有下表:
Object Field Values
------------------------------------
1 1 A;A;A;B;A;A
2 1 A;B;C;C
2 2 X
3 1 X;Y;Z
3 2 V;V;V;V;V;V;V;V;V;V;V
如何从此表中仅选择连接values
中的唯一值?所以:
Object Field Values
---------------------
1 1 A;B
2 1 A;B;C
2 2 X
3 1 X;Y;Z
3 2 V
在任何脚本语言中,我都会遍历来自Values
的值,在;
上爆炸并循环遍历该数组,并使用一些逻辑过滤掉重复项。但是,我只需要使用SQL(Server 2008)来执行此操作。
有人可以告诉我是否以及如何做到这一点?
非常感谢任何帮助: - )
答案 0 :(得分:2)
要执行此操作,请首先创建拆分功能。这是我使用的那个,但是如果你在网上搜索(或者甚至是SO)" SQL Server分割功能"如果你不喜欢这个,你会发现很多选择:
ALTER FUNCTION [dbo].[Split](@StringToSplit NVARCHAR(MAX), @Delimiter NCHAR(1))
RETURNS TABLE
AS
RETURN
(
SELECT ID = ROW_NUMBER() OVER(ORDER BY n.Number),
Position = Number,
Value = SUBSTRING(@StringToSplit, Number, CHARINDEX(@Delimiter, @StringToSplit + @Delimiter, Number) - Number)
FROM ( SELECT TOP (LEN(@StringToSplit) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(@Delimiter + @StringToSplit + @Delimiter, n.Number, 1) = @Delimiter
);
然后你可以分割你的领域,所以运行:
SELECT t.Object, t.Field, s.Value
FROM T
CROSS APPLY dbo.Split(t.[Values], ';') AS s
转过来:
Object Field Values
------------------------------------
1 1 A;A;A;B;A;A
成:
Object Field Values
------------------------------------
1 1 A
1 1 A
1 1 A
1 1 B
1 1 A
1 1 A
然后您可以应用DISTINCT
运营商:
SELECT DISTINCT t.Object, t.Field, s.Value
FROM T
CROSS APPLY dbo.Split(t.[Values], ';') AS s;
给予:
Object Field Values
------------------------------------
1 1 A
1 1 B
然后,您可以将行连接回一个列,提供最终查询:
SELECT t.Object, t.Field, [Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM T
CROSS APPLY
( SELECT DISTINCT ';' + s.Value
FROM dbo.Split(t.[Values], ';') AS s
FOR XML PATH(''), TYPE
) AS s (x)
SQL Fiddle似乎已关闭,但是一旦你创建了Split功能,下面就是一个完整的例子:
CREATE TABLE #T (Object INT, Field INT, [Values] VARCHAR(MAX));
INSERT #T
VALUES
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(2, 2, 'X'),
(3, 1, 'X;Y;Z'),
(3, 2, 'V;V;V;V;V;V;V;V;V;V;V');
SELECT t.Object, t.Field, [Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM #T AS T
CROSS APPLY
( SELECT DISTINCT ';' + s.Value
FROM dbo.Split(t.[Values], ';') AS s
FOR XML PATH(''), TYPE
) AS s (x);
修改强>
根据您的评论,您无法创建表格或修改DDL,我想我会考虑您无法创建功能的情况。您可以将上面的拆分功能扩展到您的查询中,因此您实际上并不需要创建一个功能:
CREATE TABLE #T (Object INT, Field INT, [Values] VARCHAR(MAX));
INSERT #T
VALUES
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(2, 2, 'X'),
(3, 1, 'X;Y;Z'),
(3, 2, 'V;V;V;V;V;V;V;V;V;V;V');
SELECT t.Object,
t.Field,
[Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM #T AS T
CROSS APPLY
( SELECT DISTINCT ';' + SUBSTRING(t.[Values], Number, CHARINDEX(';', t.[Values] + ';', Number) - Number)
FROM ( SELECT TOP (LEN(t.[Values]) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(';' + t.[Values] + ';', n.Number, 1) = ';'
FOR XML PATH(''), TYPE
) AS s (x);
答案 1 :(得分:2)
这是一个独立的解决方案:
DECLARE @t table(Object int, Field int, [Values] varchar(max))
INSERT @t values
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(3, 1, 'X'),
(4, 1, 'X;Y;Z'),
(5, 1, 'V;V;V;V;V;V;V;V;V;V;V')
SELECT t.Object, t.Field, x.[NewValues]
FROM @t t
CROSS APPLY
(
SELECT STUFF((
SELECT distinct ';'+t.c.value('.', 'VARCHAR(2000)') value
FROM (
SELECT x = CAST('<t>' +
REPLACE([Values], ';', '</t><t>') + '</t>' AS XML)
) a
CROSS APPLY x.nodes('/t') t(c)
for xml path(''), type
).value('.', 'varchar(max)'), 1, 1, '') [NewValues]
) x
结果:
Object Field NewValues
1 1 A;B
2 1 A;B;C
3 1 X
4 1 X;Y;Z
5 1 V
根据@ GarethD的评论,这可能会表现得很慢。
测试数据:
create table #t(Object int identity(1,1), Field int, [Values] varchar(max))
INSERT #t values
(1, 'A;A;A;B;A;A'),(1, 'A;B;C;C'), (1, 'X'), (1, 'X;Y;Z'),(1, 'V;V;V;V;V;V;V;V;V;V;V')
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
性能测试我的脚本:
SELECT t.Object, t.Field, x.[NewValues]
FROM #t t
CROSS APPLY
(
SELECT STUFF((
SELECT distinct ';'+t.c.value('.', 'VARCHAR(2000)') value
FROM (
SELECT x = CAST('<t>' +
REPLACE([Values], ';', '</t><t>') + '</t>' AS XML)
) a
CROSS APPLY x.nodes('/t') t(c)
for xml path(''), type
).value('.', 'varchar(max)'), 1, 1, '') [NewValues]
) x
结果不到1秒
性能测试Garath脚本
(必须编辑testdata以获取所有行。相同的行被视为1行):
WITH CTE AS
( SELECT DISTINCT t.Object, t.Field, s.Value
FROM #T AS T
CROSS APPLY
( SELECT ID = ROW_NUMBER() OVER(ORDER BY n.Number),
Position = Number,
Value = SUBSTRING(t.[Values], Number, CHARINDEX(';', t.[Values] + ';', Number) - Number)
FROM ( SELECT TOP (LEN(t.[Values]) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(';' + t.[Values] + ';', n.Number, 1) = ';'
) AS s
)
SELECT Object,
Field,
[Values] = STUFF((SELECT ';' + Value
FROM CTE AS T2
WHERE T2.Object = T.Object
AND T2.Field = T.Field
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)'), 1, 1, '')
FROM CTE AS T
GROUP BY Object, Field;
结果6秒
如果任何行的值为null,则此脚本也会崩溃。
答案 2 :(得分:0)
就像没有CTE的标量值函数替代......
ALTER FUNCTION [SplitRemoveDupes] (
@String VARCHAR(MAX)
,@Delimiter VARCHAR(5)
)
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE @SplitLength INT
DECLARE @DedupedValues VARCHAR(MAX)
DECLARE @SplittedValues TABLE
(
OccurenceId SMALLINT IDENTITY(1,1),
SplitValue VARCHAR(200)
)
WHILE LEN(@String) > 0
BEGIN
SELECT @SplitLength = (
CASE CHARINDEX(@Delimiter, @String)
WHEN 0
THEN LEN(@String)
ELSE CHARINDEX(@Delimiter, @String) - 1
END
)
INSERT INTO @SplittedValues
SELECT SUBSTRING(@String, 1, @SplitLength)
SELECT @String = (
CASE (LEN(@String) - @SplitLength)
WHEN 0
THEN ''
ELSE RIGHT(@String, LEN(@String) - @SplitLength - 1) END)
END
SET @DedupedValues=(SELECT DISTINCT STUFF((
SELECT DISTINCT (@Delimiter + SplitValue)
FROM @SplittedValues s
ORDER BY (@Delimiter + SplitValue)
FOR XML PATH('')
), 1, 1, '') AS a
FROM @SplittedValues ss)
RETURN @DedupedValues
END
将其称为内联...
SELECT Object, Field, [dbo].[SplitRemoveDupes](Values,';') From Table