使用SQL从列单元格中删除重复的单词

时间:2015-10-20 14:51:28

标签: sql sql-server replace character

从列单元格中删除重复的单词,例如:

数据列这是一个字段:'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'

但是我想这样,“BLACKHEATH”应该只有一次,如果找到任何其他条目则应该删除

BLACKHEATH COLCHESTER CO2 0AA

我可以通过代码执行此操作,但我不知道如何通过SQL中的SQL删除重复的单词。我已被指示通过SQL执行此操作。 任何帮助将不胜感激。

3 个答案:

答案 0 :(得分:4)

如果订单无关紧要,你可以很容易地做到这一点:

DECLARE @string VARCHAR(100) = 'BLACKHEATH 0AA  BLACKHEATH COLCHESTER CO2 0AA';

SELECT @string AS Source
    , LTRIM((
            SELECT DISTINCT ' ' + column1 AS [text()]
            FROM dbo.SplitString(@string, ' ')
            FOR XML PATH('')
            )) AS UniqueWords;

这里有什么想法?

  1. dbo.SplitString函数根据分隔符将字符串拆分为行 (在你的情况下space)。请参阅this article以找到最适合您需求的SplitString函数。
  2. DISTINCT关键字已删除dupes
  3. 使用FOR XML PATH('')我们将它们连接在一起。
  4. <强>结果:

    ╔═══════════════════════════════════════════════╦═══════════════════════════════╗
    ║                    Source                     ║          UniqueWords          ║
    ╠═══════════════════════════════════════════════╬═══════════════════════════════╣
    ║ BLACKHEATH 0AA  BLACKHEATH COLCHESTER CO2 0AA ║ 0AA BLACKHEATH CO2 COLCHESTER ║
    ╚═══════════════════════════════════════════════╩═══════════════════════════════╝
    

    如果您需要保留订单,则必须创建一个按原始顺序存储您的条款的功能(可能使用ROW_NUMBER()),并为每个单独的条款订购(删除欺骗)然后您可以计算每个单词需要的位置。我没有打扰创建它,但它应该为你的字符串输出这样的结果:

    ╔═══════════╦═══════════╦════════════╗
    ║ WordOrder ║ TermOrder ║    Term    ║
    ╠═══════════╬═══════════╬════════════╣
    ║         1 ║         1 ║ BLACKHEATH ║
    ║         2 ║         1 ║ 0AA        ║
    ║         3 ║         2 ║ BLACKHEATH ║
    ║         4 ║         1 ║ COLCHESTER ║
    ║         5 ║         1 ║ CO2        ║
    ║         6 ║         2 ║ 0AA        ║
    ╚═══════════╩═══════════╩════════════╝
    

    可以在这样的查询中重用(@Splitted是上表):

    SELECT @string AS Source
        , LTRIM((
                SELECT ' ' + Term AS [text()]
                FROM @Splitted
                WHERE TermOrder = 1
                ORDER BY WordOrder
                FOR XML PATH('')
                )) AS UniqueWords;
    

    输出这个字符串:

    ╔═══════════════════════════════════════════════╦═══════════════════════════════╗
    ║                    Source                     ║          UniqueWords          ║
    ╠═══════════════════════════════════════════════╬═══════════════════════════════╣
    ║ BLACKHEATH 0AA  BLACKHEATH COLCHESTER CO2 0AA ║ BLACKHEATH 0AA COLCHESTER CO2 ║
    ╚═══════════════════════════════════════════════╩═══════════════════════════════╝
    

    P.S。为什么你被指示在SQL中执行此操作? SQL Server不能很好地处理文本数据,因此我建议在代码中执行此操作,因为它可能会影响您的性能。

答案 1 :(得分:2)

declare @text varchar(max) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'
declare @i int = 1;
declare @nextCharacter varchar(max)
declare @word varchar(max)=''

declare @lastChar varchar(1) = substring(@text,len(@text),1)
--select @lastChar

declare @lastIndex varchar(max) = charindex(@lastChar,@text,len(@text))
select @lastIndex
--select len(@text)

create table #tmp (id int,word varchar(max))

while (@i <= len(@text))
begin

    select @nextCharacter= substring(@text, @i, 1)
    --select @nextCharacter,@i, @lastChar, len(@text)

    if (@nextCharacter !=' ')
        begin       
            select @word = @word + @nextCharacter
        end

     else
        begin
            insert into #tmp
            values(@i,@word)
            set @word=''
        end
         if (@i = len(@text) and @nextCharacter= @lastChar)
        begin
        insert into #tmp
            values(@i,@word)                    
        end

    set @i = @i +1

end;

select id,word from #tmp order by id;

WITH tblTemp as
(
SELECT ROW_NUMBER() Over(PARTITION BY word ORDER BY id)
   As RowNumber,* FROM #tmp
) --select * from tblTemp
select * into #tmp2 FROM tblTemp where RowNumber =1 

declare @newString varchar(max)=''
select @newString = @newString +word+' ' from #tmp2 order by id 

select rtrim(@newString)

drop table #tmp2
drop table #tmp

答案 2 :(得分:-2)

declare @text varchar(max) = 'BLACKHEATH 0AA BLACKHEATH COLCHESTER CO2 0AA'
declare @i int = 1;
declare @nextCharacter varchar(max)
declare @word varchar(max)=''

declare @lastChar varchar(1) = substring(@text,len(@text),1)
--select @lastChar

declare @lastIndex varchar(max) = charindex(@lastChar,@text,len(@text))
select @lastIndex
--select len(@text)

create table #tmp (id int,word varchar(max))

while (@i <= len(@text))
begin

    select @nextCharacter= substring(@text, @i, 1)
    --select @nextCharacter,@i, @lastChar, len(@text)

    if (@nextCharacter !=' ')
        begin       
            select @word = @word + @nextCharacter
        end

     else
        begin
            insert into #tmp
            values(@i,@word)
            set @word=''
        end
         if (@i = len(@text) and @nextCharacter= @lastChar)
        begin
        insert into #tmp
            values(@i,@word)                    
        end

    set @i = @i +1

end;

--select id,word from #tmp order by id;

WITH tblTemp as
(
SELECT ROW_NUMBER() Over(PARTITION BY word ORDER BY id)
   As RowNumber,* FROM #tmp
) --select * from tblTemp
select * into #tmp2 FROM tblTemp where RowNumber =1 

declare @newString varchar(max)=''
select @newString = @newString +word+' ' from #tmp2 order by id 

select rtrim(@newString) as FromAddress

drop table #tmp2
drop table #tmp

请在select子句中删除id。