我有一个表格和像这样的值
create table items_table(url varchar(max),counttotal_urls int,countduplicate_urls int,Unique_urls varchar(max),
countUnique_urls int)
insert into items_table(url) values('ht,ha,hb,ha|hc|hy')
insert into items_table(url) values('ht,hb,hb|hb|hx|hx')
insert into items_table(url) values('hz,hy,hx,hm|hm,hy')
insert into items_table(url) values('hz,hy,hx,hm|hm,hy')
答案 0 :(得分:2)
这有点复杂。但我试图在基于集合的方法中实现它。
你的架构:
nvarchar
我使用了几种CTE和XML方法
CREATE TABLE #items_table (
id INT identity
,url VARCHAR(max)
,counttotal_urls INT
,countduplicate_urls INT
,Unique_urls VARCHAR(max)
,countUnique_urls INT
)
INSERT INTO #items_table (url)
VALUES ('ht,ha,hb,ha|hc|hy')
INSERT INTO #items_table (url)
VALUES ('ht,hb,hb|hb|hx|hx')
INSERT INTO #items_table (url)
VALUES ('hz,hy,hx,hm|hm,hy')
INSERT INTO #items_table (url)
VALUES ('hy,hx,hm|hm,hy')
结果将是
;WITH CTE
AS (
SELECT url
,REPLACE(',' + url, ',h', '|h') AS url2
,CAST('<M>'
+ REPLACE(REPLACE(',' + url, ',h', '|h'), '|', '</M><M>')
+ '</M>' AS XML) AS XML_FLD
FROM #items_table
)
,CTE2
AS (
SELECT url
,SUM(CASE
WHEN SUBSTRING(url2, number, 1) > '|'
THEN 1
ELSE 0
END) / 2 AS counttotal_urls
FROM CTE C
CROSS APPLY (
SELECT *
FROM master.dbo.spt_values
WHERE type = 'P'
AND number BETWEEN 1
AND LEN(C.url2)
) CA
GROUP BY url
)
,CTE3
AS (
SELECT C2.url
,C2.counttotal_urls
,SPLITS.ABC.value('.', 'varchar(MAX)') DUP_URLS
FROM CTE2 C2
INNER JOIN CTE C ON C2.url = C.url
CROSS APPLY C.XML_FLD.nodes('/M') AS SPLITS(ABC)
)
SELECT url
,counttotal_urls
,counttotal_urls - (COUNT(DISTINCT DUP_URLS) - 1) AS countduplicate_urls
,STUFF((
SELECT DISTINCT '|' + DUP_URLS
FROM CTE3 C
WHERE C3.url = C.url
FOR XML PATH('')
), 1, 1, '') AS Unique_urls
FROM CTE3 C3
GROUP BY url
,counttotal_urls
答案 1 :(得分:1)
您必须创建一个字符串拆分表值函数(Found one at aspsnippets) 如下
CREATE FUNCTION ufn_SplitString
(
@Input NVARCHAR(MAX),
@Character CHAR(1)
)
RETURNS @Output TABLE (
Item NVARCHAR(1000)
)
AS
BEGIN
DECLARE @StartIndex INT, @EndIndex INT
SET @StartIndex = 1
IF SUBSTRING(@Input, LEN(@Input) - 1, LEN(@Input)) <> @Character
BEGIN
SET @Input = @Input + @Character
END
WHILE CHARINDEX(@Character, @Input) > 0
BEGIN
SET @EndIndex = CHARINDEX(@Character, @Input)
INSERT INTO @Output(Item)
SELECT SUBSTRING(@Input, @StartIndex, @EndIndex - 1)
SET @Input = SUBSTRING(@Input, @EndIndex + 1, LEN(@Input))
END
RETURN
END
GO
一旦功能到位,下面的代码可用于实现所需的结果
;WITH cte_OriginalTable(url) as
(
SELECT 'ht,ha,hb,ha|hc|hy' UNION ALL
SELECT 'ht,hb,hb|hb|hx|hx' UNION ALL
SELECT 'hz,hy,hx,hm|hm,hy' UNION ALL
SELECT 'hz,hy,hx,hm|hm,hy'
)
,cte_SaperaterFix AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS ID, replace(url, ',h', '|h') AS url
FROM cte_OriginalTable
)
,cte_Split as
(
SELECT o.*,
y.Item
FROM cte_SaperaterFix o
CROSS APPLY dbo.ufn_SplitString(o.url, '|') y
)
,cte_TotalCount AS
(
SELECT ID,COUNT(ID) AS counttotal_urls, COUNT(DISTINCT Item) AS Unique_urls, COUNT(ID) - COUNT(DISTINCT Item) AS countduplicate_urls
FROM cte_Split
GROUP BY ID
)
SELECT DISTINCT b.ID, b.url AS URLs, a.CountTotal_URLs, a.CountDuplicate_URLS, STUFF(( SELECT DISTINCT '|' + b1.Item AS [text()]
FROM cte_Split b1
WHERE
b.ID = b1.ID
FOR XML PATH('')
), 1, 1, '' ) AS Unique_URLs, a.Unique_URLs AS CountUnique_URLs
FROM cte_TotalCount a
JOIN cte_Split b
ON a.ID = b.ID