SQL - 查找重复值并在字段中删除

时间:2013-09-10 04:34:13

标签: sql sql-server

考虑Article表中名为ArticleCategories的列。目标是从ArticleCategories列中删除每行的重复数字(。

ArticleCategories
------------
3193;2867;3193;
2871;2923;2923;
3278;3337;3337;
2878;2876;2878;
3720;3680;3680;

有关如何使用SQL Server完成此任务的任何帮助?

期望的结果

ArticleCategories
------------
3193;2867;
2871;2923;
3278;3337;
2878;2876;
3720;3680;

如果有人可以提供帮助,请提前致谢。

7 个答案:

答案 0 :(得分:2)

首先创建函数SplitAndRemoveDuplicates,它将用分隔符拆分给定的字符串,删除重复项,然后返回字符串:

CREATE FUNCTION [dbo].[SplitAndRemoveDuplicates] (@sep VARCHAR(32), @s VARCHAR(MAX))

RETURNS VARCHAR(MAX)  
AS
BEGIN

   DECLARE @t TABLE (val VARCHAR(MAX)) 

   DECLARE @xml XML
   SET @xml = N'<root><r>' + REPLACE(@s, @sep, '</r><r>') + '</r></root>'

   INSERT INTO @t(val) SELECT r.value('.','VARCHAR(MAX)') as Item FROM @xml.nodes('//root/r') AS RECORDS(r)

   ;WITH cte
    AS (SELECT ROW_NUMBER() OVER (PARTITION BY val ORDER BY val desc) RN
    FROM  @t)
    DELETE FROM cte
    WHERE  RN > 1

    RETURN (SELECT val + ';' from @t WHERE val <> '' FOR XML PATH ('') )
END

利用该功能更新现有数据:

UPDATE Article
SET ArticleCategories = (SELECT dbo.[SplitAndRemoveDuplicates](';', ArticleCategories))

现在你有一个可重复使用的功能。

信用到:https://stackoverflow.com/a/314917/455770https://stackoverflow.com/a/3822833/455770

答案 1 :(得分:1)

找到了我的问题的解决方案,希望它也能帮助其他人。

CREATE FUNCTION dbo.DistinctList
(
@List VARCHAR(MAX),
@Delim CHAR
)
RETURNS
VARCHAR(MAX)
AS
BEGIN
DECLARE @ParsedList TABLE
(
Item VARCHAR(MAX)
)
DECLARE @list1 VARCHAR(MAX), @Pos INT, @rList VARCHAR(MAX)
SET @list = LTRIM(RTRIM(@list)) + @Delim
SET @pos = CHARINDEX(@delim, @list, 1)
WHILE @pos > 0
BEGIN
SET @list1 = LTRIM(RTRIM(LEFT(@list, @pos - 1)))
IF @list1 <> ''
INSERT INTO @ParsedList VALUES (CAST(@list1 AS VARCHAR(MAX)))
SET @list = SUBSTRING(@list, @pos+1, LEN(@list))
SET @pos = CHARINDEX(@delim, @list, 1)
END
SELECT @rlist = COALESCE(@rlist+',','') + item
FROM (SELECT DISTINCT Item FROM @ParsedList) t
RETURN @rlist
END
GO
SELECT dbo.DistinctList('342,34,456,34,3454,456,aa,bb,cc,aa',',') DistinctList
GO

在这里给予剧本作者信用。 http://blog.sqlauthority.com/2009/01/15/sql-server-remove-duplicate-entry-from-comma-delimited-string-udf/

答案 2 :(得分:1)

没有游标,而且是

之前此网站中先前问题的组合
  1. Split Columns
  2. Join The Rows Back
  3. 所以答案是创建一个string Split function

    CREATE FUNCTION [dbo].[StringSplit]
    (
      @delimited nvarchar(max),
      @delimiter nvarchar(100)     
    ) RETURNS @t TABLE
    (
    -- Id column can be commented out, not required for sql splitting string
      id int identity(1,1), -- I use this column for numbering splitted parts
      val nvarchar(max)
    )
    AS
    BEGIN
      declare @xml xml
      set @xml = N'<root><r>' + replace(@delimited,@delimiter,'</r><r>') + '</r></root>'
    
      insert into @t(val)
      select 
        r.value('.','varchar(max)') as item
      from @xml.nodes('//root/r') as records(r)
    
      RETURN
    END
    

    查询如下:

    declare @Article  table
    (
      ArticleCategories varchar(100)
    )
    
    insert into @Article values
    ('3193;2867;3193;'),
    ('2871;2923;2923;'),
    ('3278;3337;3337;'),
    ('2878;2876;2878;'),
    ('3720;3680;3680;')
    
    ;WITH DistinctArticles AS (
        SELECT DISTINCT ArticleCategories, Val
        FROM @article
        CROSS APPLY dbo.[StringSplit](ArticleCategories, ';')
        WHERE Val <> ''
    ),
    Concatenated AS (
        SELECT 
            ArticleCategories,
            STUFF((
                SELECT '; ' + CAST([Val] AS VARCHAR(MAX)) 
                FROM DistinctArticles
                WHERE (ArticleCategories = Results.ArticleCategories) 
        FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)'),1,2,'') + ';' AS DistinctArticleCategories
        FROM DistinctArticles Results
        GROUP BY ArticleCategories
    )
    UPDATE @Article 
    SET ArticleCategories = Concatenated.DistinctArticleCategories
    FROM @Article a
    INNER JOIN Concatenated ON a.ArticleCategories = Concatenated.ArticleCategories
    

答案 3 :(得分:0)

请尝试以下解决方案,对其情况进行全面测试,我希望这会对您有所帮助。

DECLARE @ArticleCategories varchar(500)
DECLARE Cursor_Article CURSOR FOR 
SELECT ArticleCategories from Article
OPEN Cursor_Article
FETCH NEXT FROM Cursor_Article INTO @ArticleCategories
WHILE @@FETCH_STATUS = 0
BEGIN

   DECLARE @individualNum varchar(500),@ArticleCategoriesNew varchar(500) ,@ArticleCategoriesRem varchar(500)
   SET @ArticleCategoriesNew='';
   SET @ArticleCategoriesRem=@ArticleCategories

   WHILE PATINDEX('%;%',@ArticleCategoriesRem) > 0
   BEGIN
       SET @individualNum = SUBSTRING(@ArticleCategoriesRem, 0, PATINDEX('%;%',@ArticleCategoriesRem)) 

       SET @ArticleCategoriesRem = SUBSTRING(@ArticleCategoriesRem, LEN(@individualNum + ';') + 1, LEN(@ArticleCategoriesRem))
       if(@ArticleCategoriesRem not like '%'+@individualNum+'%')
       set @ArticleCategoriesNew=CASE WHEN ISNULL(@ArticleCategoriesNew,'')='' THEN @individualNum ELSE @ArticleCategoriesNew+';'+@individualNum+';' END

   END
   --If ArticleCategories is unique then leave same condition else Fetch primary key in cursor and append in Where condition of Update query
   Update Article SET ArticleCategories=@ArticleCategoriesNew WHERE ArticleCategories=@ArticleCategories    

   FETCH NEXT FROM Cursor_Article INTO @ArticleCategories
END 
CLOSE Cursor_Article;
DEALLOCATE Cursor_Article;

答案 4 :(得分:0)

尝试以下解决方案

首先创建一个UDF,如下所示

alter function dbo.SplitString2(@inputStr varchar(1000),@del varchar(5))
RETURNS varchar(max)
As
BEGIN

DECLARE @t table(col1 varchar(100))
DECLARE @table table(col1 varchar(100))
DECLARE @ret varchar(max)
Set @ret = ''
INSERT INTO @t
select @inputStr

if CHARINDEX(@del,@inputStr,1) > 0
BEGIN
    ;WITH CTE1 as (
    select ltrim(rtrim(LEFT(col1,CHARINDEX(@del,col1,1)-1))) as col,RIGHT(col1,LEN(col1)-CHARINDEX(@del,col1,1)) as rem from @t
    union all
    select ltrim(rtrim(LEFT(rem,CHARINDEX(@del,rem,1)-1))) as col,RIGHT(rem,LEN(rem)-CHARINDEX(@del,rem,1))
    from CTE1 c
    where CHARINDEX(@del,rem,1)>0
    )

        INSERT INTO @table 
        select col from CTE1
        union all
        select rem from CTE1 where CHARINDEX(@del,rem,1)=0
    END
ELSE
BEGIN
    INSERT INTO @table 
    select col1 from @t
END

Set @ret = (Select distinct col1 + ';'  from @table for xml path(''))

return @ret

END

然后您可以根据您的要求运行以下选择/更新查询

更新查询 - 这将更新表格中的记录

update ArticleCategory Set ArticleCategories = dbo.SplitString2(ArticleCategories, ';' )

选择查询 - 这将仅选择不同的记录

Select ArticleCategories , dbo.SplitString2(ArticleCategories, ';' ) from ArticleCategory

答案 5 :(得分:0)

如果哪个行必须保留无关紧要:

DELETE FROM Article WHERE (
    SELECT COUNT(*) FROM Article t WHERE
    t.ArticleCategories=Article.ArticleCategories AND t.%%physloc%%<Article.%%physloc%%
)>0;

这就是我在SQLite中的做法,不确定我是否匹配MS SQL Server的语法......

答案 6 :(得分:-2)

SELECT DISTINCT ArticleCategories FROM Article

OR

SELECT ArticleCategories 
FROM Article
GROUP BY ArticleCategories 

这是为了删除重复值

DELETE
FROM Article
WHERE ArticleCategories NOT IN
(
   SELECT MAX(ArticleCategories)
   FROM Article
   GROUP BY ArticleCategories
)