SQL Server /删除重复项

时间:2013-12-22 15:56:58

标签: sql sql-server

我有一张包含300万条记录的表。该表看起来像:

id          phones
----------- -----------------
0           1234; 5897;
1           0121; 7875; 5455;
2           0121; 5455; 7875;
3           999;
4           0121;
5           5455; 0121;

id为1,2,4&的记录5是重复的。我想保留唯一具有最高ID和最长电话串的记录。

所以在我的例子中,运行查询后,我的表应该是:

id          phones
----------- -----------------
0           1234; 5897;
2           0121; 5455; 7875;
3           999;

我将如何做到这一点?

注意:手机字符串中没有空格。

1 个答案:

答案 0 :(得分:4)

这应该是一个很好的起点;

您需要创建2个临时表。

一个将保存id(列Id),一个将保存电话(列Id,电话)。所以这将是一对一的。

然后您需要做的是在这两个表中插入整个原始表

完成此操作后,开始排序/比较结果以重建过滤结果。

所以这是一个演示; (此代码不是 优化 ,但有效)

declare @AllPhones table (id int, phones varchar(max))

insert into @AllPhones select 0, '1234; 5897;'
insert into @AllPhones select 1, '0121; 7875; 5455;'
insert into @AllPhones select 2, '0121; 5455; 7875;'
insert into @AllPhones select 3, '999;'
insert into @AllPhones select 4, '0121;'
insert into @AllPhones select 5, '5455; 0121;'
insert into @AllPhones select 6, '222;'
insert into @AllPhones select 7, '888;'
insert into @AllPhones select 8, '222; 888;'
insert into @AllPhones select 9, '888; 222;'


select * from @AllPhones

declare @IdPhone table (id int, done bit)
declare @Phone table (id int, phone varchar(max), insertOrder int)

insert into @IdPhone
select id, 0
from   @AllPhones

declare @Id int
declare @ConcatPhone varchar(max)

declare @idx int       
declare @slice varchar(max)
declare @insertOrder int

while exists(select * from @IdPhone where done=0)
begin
    select top 1 @Id = ap.id
               , @ConcatPhone = ap.phones
    from @IdPhone ip inner join @AllPhones ap on ip.id = ap.id
    where done=0 

    select @idx = 1
    select @insertOrder = 1       
    if len(@ConcatPhone)> 0 and @ConcatPhone is not null
    begin
        while @idx!= 0       
        begin       
            set @idx = charindex(';',@ConcatPhone)       
            if @idx!=0       
                set @slice = left(@ConcatPhone,@idx - 1)       
            else       
                set @slice = @ConcatPhone       

            if(len(@slice)>0)
                insert into @Phone(Id, phone,insertOrder) values(@Id, rtrim(ltrim(@slice)),@insertOrder)       

            set @ConcatPhone = right(@ConcatPhone,len(@ConcatPhone) - @idx)       
            if len(@ConcatPhone) = 0 break       

            select @insertOrder = @insertOrder+1 
        end   
    end

    update @IdPhone 
    set done=1 
    where Id = @Id
end

declare @UniquePhone table (id int, c int, phone varchar(max),insertOrder int, done int)

insert into @UniquePhone
    select p.id
         , (select top 1 count(pCount.id) from @phone pCount where pCount.id=p.id) as t
         , p.phone
         ,p.insertOrder
         ,0
    from @phone p
    group by p.id, p.phone, p.insertOrder

while exists(select * from @UniquePhone where done=0)
begin
    select top 1 @Id = up.id
    from @UniquePhone up 
    where done=0 
    order by c desc
           , id desc

    delete from @UniquePhone 
    where id <> @id and phone in (select phone from @UniquePhone pp where pp.id=@id)

    print @id

    update @UniquePhone 
    set done=1 
    where Id = @Id
end

select FinalTable.id,
       ltrim(rtrim(FinalTable.Phones)) As Phones
from(select distinct up2.id, 
           (select up1.phone + '; ' as [text()]
            from @UniquePhone up1
            where up1.id = up2.id
            order by up1.id, insertOrder
            for XML PATH ('')) Phones
     from @UniquePhone up2) [FinalTable]