我有一张包含300万条记录的表。该表看起来像:
id phones
----------- -----------------
0 1234; 5897;
1 0121; 7875; 5455;
2 0121; 5455; 7875;
3 999;
4 0121;
5 5455; 0121;
id为1,2,4&的记录5是重复的。我想保留唯一具有最高ID和最长电话串的记录。
所以在我的例子中,运行查询后,我的表应该是:
id phones
----------- -----------------
0 1234; 5897;
2 0121; 5455; 7875;
3 999;
我将如何做到这一点?
注意:手机字符串中没有空格。
答案 0 :(得分:4)
这应该是一个很好的起点;
您需要创建2个临时表。
一个将保存id(列Id),一个将保存电话(列Id,电话)。所以这将是一对一的。
然后您需要做的是在这两个表中插入整个原始表
完成此操作后,开始排序/比较结果以重建过滤结果。
所以这是一个演示; (此代码不是 优化 ,但有效)
declare @AllPhones table (id int, phones varchar(max))
insert into @AllPhones select 0, '1234; 5897;'
insert into @AllPhones select 1, '0121; 7875; 5455;'
insert into @AllPhones select 2, '0121; 5455; 7875;'
insert into @AllPhones select 3, '999;'
insert into @AllPhones select 4, '0121;'
insert into @AllPhones select 5, '5455; 0121;'
insert into @AllPhones select 6, '222;'
insert into @AllPhones select 7, '888;'
insert into @AllPhones select 8, '222; 888;'
insert into @AllPhones select 9, '888; 222;'
select * from @AllPhones
declare @IdPhone table (id int, done bit)
declare @Phone table (id int, phone varchar(max), insertOrder int)
insert into @IdPhone
select id, 0
from @AllPhones
declare @Id int
declare @ConcatPhone varchar(max)
declare @idx int
declare @slice varchar(max)
declare @insertOrder int
while exists(select * from @IdPhone where done=0)
begin
select top 1 @Id = ap.id
, @ConcatPhone = ap.phones
from @IdPhone ip inner join @AllPhones ap on ip.id = ap.id
where done=0
select @idx = 1
select @insertOrder = 1
if len(@ConcatPhone)> 0 and @ConcatPhone is not null
begin
while @idx!= 0
begin
set @idx = charindex(';',@ConcatPhone)
if @idx!=0
set @slice = left(@ConcatPhone,@idx - 1)
else
set @slice = @ConcatPhone
if(len(@slice)>0)
insert into @Phone(Id, phone,insertOrder) values(@Id, rtrim(ltrim(@slice)),@insertOrder)
set @ConcatPhone = right(@ConcatPhone,len(@ConcatPhone) - @idx)
if len(@ConcatPhone) = 0 break
select @insertOrder = @insertOrder+1
end
end
update @IdPhone
set done=1
where Id = @Id
end
declare @UniquePhone table (id int, c int, phone varchar(max),insertOrder int, done int)
insert into @UniquePhone
select p.id
, (select top 1 count(pCount.id) from @phone pCount where pCount.id=p.id) as t
, p.phone
,p.insertOrder
,0
from @phone p
group by p.id, p.phone, p.insertOrder
while exists(select * from @UniquePhone where done=0)
begin
select top 1 @Id = up.id
from @UniquePhone up
where done=0
order by c desc
, id desc
delete from @UniquePhone
where id <> @id and phone in (select phone from @UniquePhone pp where pp.id=@id)
print @id
update @UniquePhone
set done=1
where Id = @Id
end
select FinalTable.id,
ltrim(rtrim(FinalTable.Phones)) As Phones
from(select distinct up2.id,
(select up1.phone + '; ' as [text()]
from @UniquePhone up1
where up1.id = up2.id
order by up1.id, insertOrder
for XML PATH ('')) Phones
from @UniquePhone up2) [FinalTable]