如何在列中查找重复文本并删除sql server中的重复项

时间:2016-09-09 07:11:27

标签: sql sql-server

在以下示例中

address: AUNDH AUNDH CAMP

我想删除副本,结果必须是

address: AUNDH CAMP

如何在sql server中执行此操作?

6 个答案:

答案 0 :(得分:2)

您可以创建此功能:

Create FUNCTION dbo.RemoveDuplicate
    (
    @StringList VARCHAR(MAX),
    @Delim CHAR
    )
    RETURNS
    VARCHAR(MAX)
    AS
    BEGIN
    DECLARE @ParsedList TABLE
    (
    Item VARCHAR(MAX)
    )
        DECLARE @list1 VARCHAR(MAX), @Pos INT, @rList VARCHAR(MAX)
        SET @StringList = LTRIM(RTRIM(@StringList)) + @Delim
        SET @pos = CHARINDEX(@delim, @StringList, 1)
        WHILE @pos > 0
        BEGIN
        SET @list1 = LTRIM(RTRIM(LEFT(@StringList, @pos - 1)))
        IF @list1 <> ''
        INSERT INTO @ParsedList VALUES (CAST(@list1 AS VARCHAR(MAX)))
        SET @StringList = SUBSTRING(@StringList, @pos+1, LEN(@StringList))
        SET @pos = CHARINDEX(@delim, @StringList, 1)
        END
        SELECT @rlist = COALESCE(@rlist+@Delim,'') + item
        FROM (SELECT DISTINCT Item FROM @ParsedList) t
        RETURN @rlist
    END
    GO

然后像这样使用它:

 Declare @address varchar(300)='AUNDH AUNDH CAMP'
 SELECT dbo.RemoveDuplicate(@address,' ') -- The delimiter is an empty space

答案 1 :(得分:0)

如果您使用的是SQL Server&gt; 2016年,您可以使用STRING_SPLIT

按空格拆分列值并选择不同的列值,然后连接并创建结果列字符串。

答案 2 :(得分:0)

小心生产数据库中的这种处理。有很多想法(双重总是错误,如何处理标点符号,是仅用空格分隔的单词)。但是,您可以使用递归,如下面的代码段:

DECLARE @word varchar(MAX) = 'AUNDH AUNDH CAMP';

WITH Splitter AS
(
    SELECT 1 N, LEFT(@word,CHARINDEX(' ',@word,1)-1) Word, SUBSTRING(@word, CHARINDEX(' ', @word, 0)+1, LEN(@word)) Rest
    UNION ALL 
    SELECT N+1 N,
           CASE WHEN CHARINDEX(' ', Rest, 0)>0 THEN LEFT(Rest, CHARINDEX(' ', Rest, 0)-1) ELSE Rest END,
           CASE WHEN CHARINDEX(' ', Rest, 0)>0 THEN SUBSTRING(Rest, CHARINDEX(' ', Rest, 0)+1, LEN(Rest)) ELSE NULL END
    FROM Splitter
    WHERE LEN(Rest)>0
), Numbered AS
(
    SELECT N, Word, ROW_NUMBER() OVER (PARTITION BY Word ORDER BY N) RowNum
    FROM Splitter
)
SELECT STUFF((SELECT ' '+Word
              FROM Numbered
              WHERE RowNum=1
              ORDER BY N
              FOR XML PATH('')), 1, 1, '') NoDuplicates

如果您愿意,可以将其嵌入到函数中。

答案 3 :(得分:0)

使用Numbers表:

 create table #test
(
id varchar(max)
)

insert into #test
select 'a a b'
union all
select 'c c d'

;with cte
as
(select *,dense_rank() over ( order by id) as rownum
from
#test t 
cross apply
(select * from [dbo].[SplitStrings_Numbers](t.id,' '))b
)
,finalresult
as
(select 
  (
select ''+ item from cte c1 where c1.rownum=c2.rownum
group by item
for xml path('')
)as finalvalue
 from cte c2
 )
 select finalvalue from finalresult
 group by finalvalue

答案 4 :(得分:0)

这与@TheGameiswar的答案基本相同,只是缩短了一些,不必要的步骤被排除在外。

create table #test
(
id varchar(max)
)

insert into #test
select 'a a b'
union all
select 'c c d';

select *, 
  stuff((
    select ' '+ item 
    from  [dbo].[DelimitedSplit8K](t.id,' ') 
    group by item
    for xml path('')
  ),1,1,'') as finalvalue 
from #test t

DelimitedSplit8K是来自KNOWNFOLDERID (Windows)的快速字符串拆分器。你可以使用其他任何一个。

答案 5 :(得分:0)

DECLARE @source TABLE
(
  [str] VARCHAR(MAX)
)

INSERT INTO @source
SELECT 'address: AUNDH AUNDH CAMP'
UNION ALL
SELECT 'address: AUNDH CAMP AUNDH'
UNION ALL
SELECT 'address: BBB AUNDH address:'
UNION ALL
SELECT 'address: BBB AUNDH CAMP'

DECLARE @tbl AS TABLE
(
  num INT,
  [str] VARCHAR(MAX)
)

DECLARE @result AS TABLE
(
  num   INT,
  n     SMALLINT,
  [str] VARCHAR(MAX)
)

INSERT INTO @tbl
SELECT ROW_NUMBER() OVER (ORDER BY [str]), [str]
FROM   @source

DECLARE
  @i INT = 0,
  @max_i INT = (SELECT MAX([num]) FROM @tbl),
  @str VARCHAR(MAX),
  @n SMALLINT

WHILE(@i < @max_i)
BEGIN

  SET @str = (SELECT [str] FROM @tbl WHERE [num] = @i + 1)

  SET @n = 1

  WHILE(CHARINDEX(' ', @str) <> 0)
  BEGIN

    INSERT INTO @result
    SELECT num, @n, SUBSTRING(@str, 1, CHARINDEX(' ', @str) - 1) FROM @tbl WHERE [num] = @i + 1

    SET @str = SUBSTRING(@str, CHARINDEX(' ', @str) + 1, LEN(@str))

    SET @n += 1

  END

  INSERT INTO @result
  SELECT num, @n, @str FROM @tbl WHERE [num] = @i + 1

  SET @i += 1
END

-- result
SELECT SUBSTRING([str], 2, LEN([str])) AS [str]
FROM   (
         SELECT DISTINCT r_big.num,
                (
                  SELECT ' ' + [str] AS [text()]
                  FROM   @result r_small
                  WHERE  r_small.num = r_big.num
                  GROUP BY r_small.num, r_small.[str] 
                  ORDER BY num, min(n)
                  FOR XML PATH('')
                ) AS [str]
         FROM   @result r_big
       )x