我有一个包含4列的表,数据看起来像
`cust_id firstname lastname value`
`1 a b ct;ct;ct;dir`
`2 c a ct;ct;ct;ct;ct;ct`
`3 d e ct;ct;ct;dir;st`
我想输出
`cust_id firstname lastname value`
`1 a b ct;dir`
`2 c a ct`
`3 d e ct;dir;st`
每行都有不同数量的重复词。
任何帮助都非常感激。
答案 0 :(得分:3)
借助Parse / Split函数和CROSS应用。我应该补充一点,如果你不能使用UDF,PARSE的逻辑可以很容易地迁移到CROSS APPLY
此外,如果重要,序列将被保留。
Declare @YourTable table (cust_id int,value varchar(50))
Insert Into @YourTable values
(1,'ct;ct;ct;dir'),
(2,'ct;ct;ct;ct;ct;ct'),
(3,'ct;ct;ct;dir;st')
Select A.*
,B.*
From @YourTable A
Cross Apply (
Select String=Stuff((Select Distinct ';' +RetVal From (
Select RetVal,RetSeq=min(RetSeq)
From [dbo].[udf-Str-Parse](A.Value,';') C
Group By RetVal
) X For XML Path ('')),1,1,'')
) B
返回
cust_id value String
1 ct;ct;ct;dir ct;dir
2 ct;ct;ct;ct;ct;ct ct
3 ct;ct;ct;dir;st ct;dir;st
UDF如果需要
CREATE FUNCTION [dbo].[udf-Str-Parse] (@String varchar(max),@Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>'+ replace((Select @String as [*] For XML Path('')),@Delimiter,'</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
--Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
--Select * from [dbo].[udf-Str-Parse]('this,is,<test>,for,< & >',',')
另一个Parse / Spit函数(返回与XML版本相同的结果)
CREATE FUNCTION [dbo].[udf-Str-Parse-8K] (@String varchar(max),@Delimiter varchar(10))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 a,cte1 b,cte1 c,cte1 d) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter)) = @Delimiter),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By A.N)
,RetVal = Substring(@String, A.N, A.L)
From cte4 A
);
--Orginal Source http://www.sqlservercentral.com/articles/Tally+Table/72993/
--Much faster than str-Parse, but limited to 8K
--Select * from [dbo].[udf-Str-Parse-8K]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse-8K]('John||Cappelletti||was||here','||')
答案 1 :(得分:0)
如果您希望将ct;
的重复相邻实例转换为单个实例,则可以执行以下操作:
select replace(replace(replace(col, 'ct;', '><'), '<>', ''), '><', 'ct;')
这假定“&lt;”和“&gt;”不会出现在列中。任何两个字符都可用于此目的。
您也可以将其放入update
。
注意:如果这些是某种代码,那么您应该规范化数据。此问题确实出现在标准化不合适的其他上下文中(例如,删除字符串中的连续空格)。
答案 2 :(得分:0)
首先要指出的是,如果您以规范化的方式存储数据,那么您将不会遇到任何问题,最好的方法是单独的一对多表,例如
<强> CustomerValues 强>
Cust_ID Value
-------------------
1 ct
1 ct
1 ct
1 dir
2 ct
2 ct
.....
您的查询将变为类似:
--SAMPLE DATA
WITH Customers AS
( SELECT *
FROM (VALUES
(1, 'a', 'b'),
(2, 'c', 'a'),
(3, 'd', 'e')
) AS t (cust_id, firstname, lastname)
), CustomerValues AS
( SELECT *
FROM (VALUES
(1, 'ct'), (1, 'ct'), (1, 'ct'), (1, 'dir'),
(2, 'ct'), (2, 'ct'), (2, 'ct'), (2, 'ct'), (2, 'ct'), (2, 'ct'),
(3, 'ct'), (3, 'ct'), (3, 'ct'), (3, 'dir'), (3, 'st')
) AS t (cust_id, value)
)
-- SAMPLE DATA END
SELECT c.cust_id,
c.firstname,
c.lastname,
value = STUFF(CustomerValues.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM Customers AS c
CROSS APPLY
( SELECT DISTINCT ';' + value
FROM CustomerValues AS cv
WHERE cv.cust_id = c.cust_id
FOR XML PATH(''), TYPE
) AS cv (CustomerValues);
有关行如何连接的更多信息,请参阅Grouped Concatenation in SQL Server
如果没有这种格式的数据,您需要执行某种分割。有关详情,请参阅Split strings the right way – or the next best way
WITH Customers AS
( SELECT *
FROM (VALUES
(1, 'a', 'b', 'ct;ct;ct;dir'),
(2, 'c', 'a', 'ct;ct;ct;ct;ct;ct'),
(3, 'd', 'e', 'ct;ct;ct;dir;st')
) AS t (cust_id, firstname, lastname, value)
), Numbers (Number) AS
( SELECT ROW_NUMBER() OVER(ORDER BY N1.N)
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS n1 (N)
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS n2 (N)
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS n3 (N)
), CustomerValues AS
( SELECT c.cust_id,
value = SUBSTRING(c.value, Number, CHARINDEX(';', c.value + ';', n.Number) - n.Number)
FROM Customers AS c
INNER JOIN Numbers AS n
ON N.Number <= CONVERT(INT, LEN(c.value))
AND SUBSTRING(';' + c.value, n.Number, 1) = ';'
)
SELECT c.cust_id,
c.firstname,
c.lastname,
value = STUFF(CustomerValues.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM Customers AS c
CROSS APPLY
( SELECT DISTINCT ';' + value
FROM CustomerValues AS cv
WHERE cv.cust_id = c.cust_id
FOR XML PATH(''), TYPE
) AS cv (CustomerValues);
答案 3 :(得分:0)
您可以采取以下措施:
WITH
CTE_Sample AS
(
SELECT 1 AS cust_id, 'a' AS firstname, 'b' AS lastname, 'ct;ct;ct;dir' AS YourValue UNION ALL
SELECT 2 AS cust_id, 'c' AS firstname, 'a' AS lastname, 'ct;ct;ct;ct;ct;ct' AS YourValue UNION ALL
SELECT 3 AS cust_id, 'd' AS firstname, 'e' AS lastname, 'ct;ct;ct;dir;st' AS YourValue
),
--
-- Split your values into lines (Distinct values)
CTE_Split AS
(
SELECT DISTINCT
YourValue
,value AS Val
FROM CTE_Sample SS
CROSS APPLY STRING_SPLIT(YourValue, ';')
)
SELECT
cust_id
,firstname
,lastname
-- Aggregate your different value into one column
,STUFF((
SELECT ';'+ Val
FROM CTE_Split SP
WHERE SP.YourValue = SA.YourValue
FOR XML PATH('')
), 1, 1, ''
) AS Val
FROM CTE_Sample SA