计算并删除单个列

时间:2017-03-22 09:19:08

标签: sql-server sql-server-2008

我有一个表格和像这样的值

create table items_table(url varchar(max),counttotal_urls int,countduplicate_urls int,Unique_urls varchar(max),
countUnique_urls int)

insert into items_table(url) values('ht,ha,hb,ha|hc|hy')
insert into items_table(url) values('ht,hb,hb|hb|hx|hx')
insert into items_table(url) values('hz,hy,hx,hm|hm,hy')
insert into items_table(url) values('hz,hy,hx,hm|hm,hy')
  1. 我需要将,h替换为|h,为此,我将使用replace(url,',h','|h')

  2. 我需要考虑分离'|'

  3. 来计算total_urls的数量
  4. 我想检查或统计可能存在的重复网址

  5. 我希望通过删除重复项在Unique_url列中更新unique_urls

  6. 最后我想计算将在另一栏中更新的Unique_urls

  7. 期望的输出:

    enter image description here

2 个答案:

答案 0 :(得分:2)

这有点复杂。但我试图在基于集合的方法中实现它。

你的架构:

nvarchar

我使用了几种CTE和XML方法

CREATE TABLE #items_table (
    id INT identity
    ,url VARCHAR(max)
    ,counttotal_urls INT
    ,countduplicate_urls INT
    ,Unique_urls VARCHAR(max)
    ,countUnique_urls INT
    )

INSERT INTO #items_table (url)
VALUES ('ht,ha,hb,ha|hc|hy')

INSERT INTO #items_table (url)
VALUES ('ht,hb,hb|hb|hx|hx')

INSERT INTO #items_table (url)
VALUES ('hz,hy,hx,hm|hm,hy')

INSERT INTO #items_table (url)
VALUES ('hy,hx,hm|hm,hy')

结果将是

;WITH CTE
AS (
    SELECT url
        ,REPLACE(',' + url, ',h', '|h') AS url2
        ,CAST('<M>' 
         + REPLACE(REPLACE(',' + url, ',h', '|h'), '|', '</M><M>') 
         + '</M>' AS XML) AS XML_FLD
    FROM #items_table
    )
,CTE2
AS (
    SELECT url
        ,SUM(CASE 
                WHEN SUBSTRING(url2, number, 1) > '|'
                    THEN 1
                ELSE 0
                END) / 2 AS counttotal_urls
    FROM CTE C
    CROSS APPLY (
        SELECT *
        FROM master.dbo.spt_values
        WHERE type = 'P'
            AND number BETWEEN 1
                AND LEN(C.url2)
        ) CA
    GROUP BY url
    )
,CTE3
AS (
    SELECT C2.url
        ,C2.counttotal_urls
        ,SPLITS.ABC.value('.', 'varchar(MAX)') DUP_URLS
    FROM CTE2 C2
    INNER JOIN CTE C ON C2.url = C.url
    CROSS APPLY C.XML_FLD.nodes('/M') AS SPLITS(ABC)
    )
SELECT url
    ,counttotal_urls
    ,counttotal_urls - (COUNT(DISTINCT DUP_URLS) - 1) AS countduplicate_urls
    ,STUFF((
            SELECT DISTINCT '|' + DUP_URLS
            FROM CTE3 C
            WHERE C3.url = C.url
            FOR XML PATH('')
            ), 1, 1, '') AS Unique_urls
FROM CTE3 C3
GROUP BY url
    ,counttotal_urls

答案 1 :(得分:1)

您必须创建一个字符串拆分表值函数(Found one at aspsnippets)     如下

CREATE FUNCTION ufn_SplitString
(    
      @Input NVARCHAR(MAX),
      @Character CHAR(1)
)
RETURNS @Output TABLE (
      Item NVARCHAR(1000)
)
AS
BEGIN
      DECLARE @StartIndex INT, @EndIndex INT

      SET @StartIndex = 1
      IF SUBSTRING(@Input, LEN(@Input) - 1, LEN(@Input)) <> @Character
      BEGIN
            SET @Input = @Input + @Character
      END

      WHILE CHARINDEX(@Character, @Input) > 0
      BEGIN
            SET @EndIndex = CHARINDEX(@Character, @Input)

            INSERT INTO @Output(Item)
            SELECT SUBSTRING(@Input, @StartIndex, @EndIndex - 1)

            SET @Input = SUBSTRING(@Input, @EndIndex + 1, LEN(@Input))
      END

      RETURN
END
GO

一旦功能到位,下面的代码可用于实现所需的结果

;WITH cte_OriginalTable(url) as 
(
SELECT 'ht,ha,hb,ha|hc|hy' UNION ALL
SELECT 'ht,hb,hb|hb|hx|hx' UNION ALL
SELECT 'hz,hy,hx,hm|hm,hy' UNION ALL
SELECT 'hz,hy,hx,hm|hm,hy'
)
,cte_SaperaterFix AS
(
SELECT  ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS ID, replace(url, ',h', '|h') AS url
FROM cte_OriginalTable
)
,cte_Split as 
(
SELECT o.*,
    y.Item
FROM cte_SaperaterFix o
CROSS APPLY dbo.ufn_SplitString(o.url, '|') y
)
,cte_TotalCount AS
(
SELECT ID,COUNT(ID) AS counttotal_urls, COUNT(DISTINCT Item) AS Unique_urls, COUNT(ID) - COUNT(DISTINCT Item) AS countduplicate_urls
FROM cte_Split
GROUP BY ID
)
SELECT DISTINCT b.ID, b.url AS URLs, a.CountTotal_URLs, a.CountDuplicate_URLS, STUFF((    SELECT DISTINCT '|' + b1.Item AS [text()]
                        FROM cte_Split b1
                        WHERE
                        b.ID = b1.ID
                        FOR XML PATH('')
                        ), 1, 1, '' ) AS Unique_URLs, a.Unique_URLs AS CountUnique_URLs
FROM cte_TotalCount a
JOIN cte_Split b 
ON a.ID = b.ID