我遇到了从不同的sourceport系统中找到重复项的情况。
对于Ex:我得到了如下表格:declare @table table (id int,portnumber int, [sourceport] varchar(50), sourcereportedDate datetime )
insert into @table values (1, 1111, 'north' , '2016-08-20 09:44:30.847')
insert into @table values (2, 1111, 'north' , '2016-08-21 09:44:30.847')
insert into @table values (3, 1111, 'north' , '2016-08-22 09:44:30.847')
insert into @table values (4, 2222, 'north' , '2016-08-20 09:44:30.847')
insert into @table values (5, 2222, 'north' , '2016-08-26 09:44:30.847')
insert into @table values (6, 2222, 'south' , '2016-08-22 09:44:30.847')
insert into @table values (7, 3333, 'south' , '2016-08-10 09:44:30.847')
insert into @table values (8, 3333, 'north' , '2016-08-12 09:44:30.847')
insert into @table values (9, 4444, 'north' , '2016-08-20 09:44:30.847')
insert into @table values (10, 5555, 'south' , '2016-08-21 09:44:30.847')
insert into @table values (11, 5555, 'south' , '2016-08-27 09:44:30.847')
insert into @table values (12, 6666, 'south' , '2016-08-10 09:44:30.847')
insert into @table values (13, 6666, 'north' , '2016-08-21 09:44:30.847')
insert into @table values (14, 6666, 'south' , '2016-08-09 09:44:30.847')
现在,我希望找到重复的' portnumber
'应该是相同的' sourceport
'应该是不同的。如果' portnumber
'相同和' sourceport
'同样它不应该重复。
我还需要additional column
,其中包含Id
最伟大的sourcereportedDate
'日期
我想获得如下输出:
(4, 2222, 'north' , '2016-08-20 09:44:30.847',5)
(5, 2222, 'north' , '2016-08-26 09:44:30.847','latest')
(6, 2222, 'south' , '2016-08-22 09:44:30.847',5)
(7, 3333, 'south' , '2016-08-10 09:44:30.847',8)
(8, 3333, 'north' , '2016-08-12 09:44:30.847','latest')
(12, 6666, 'south' , '2016-08-10 09:44:30.847',13)
(13, 6666, 'north' , '2016-08-21 09:44:30.847','latest')
(14, 6666, 'south' , '2016-08-09 09:44:30.847',13)
提前致谢。
答案 0 :(得分:1)
试试这个:
;
with
dis as
(
select distinct portnumber, sourceport
from @table
),
dup as
(
select portnumber
from dis
group by portnumber
having count(1) > 1
),
mx as
(
select
dup.portnumber,
max(t.sourcereportedDate) as sourcereportedDate
from
dup
join
@table as t
on
t.portnumber = dup.portnumber
group by
dup.portnumber
),
mxi as
(
select
mx.portnumber,
t.id
from
mx
left join
@table as t
on
t.portnumber = mx.portnumber
and t.sourcereportedDate = mx.sourcereportedDate
)
select
t.id,
t.portnumber,
t.sourceport,
t.sourcereportedDate,
case when t.id = mxi.id
then 'latest'
else cast(mxi.id as varchar(10))
end as latest
from
dup
join
@table as t
on
t.portnumber = dup.portnumber
join
mxi
on
mxi.portnumber = t.portnumber
left join
mx
on
mx.portnumber = t.portnumber
and mx.sourcereportedDate = t.sourcereportedDate
答案 1 :(得分:1)
请尝试这个(我仍然认为可以进一步优化) -
;with DupWithMaxDate as (
select
a.portnumber,
sourcereportedDate = max(a.sourcereportedDate)
from @table a
left join @table b on b.portnumber = a.portnumber and b.sourceport <> a.sourceport
where b.portnumber is not null
group by a.portnumber
),
DupWithMaxID as (
select
a.portnumber,
max_id = a.id
from DupWithMaxDate x
inner join @table a on a.portnumber = x.portnumber and a.sourcereportedDate = x.sourcereportedDate
)
select
a.id,
a.portnumber,
a.sourceport,
a.sourcereportedDate,
new_column = case when a.id = x.max_id then 'Latest' else convert(varchar(10), x.max_id) end
from DupWithMaxID x
inner join @table a on a.portnumber = x.portnumber
更新了以上查询 -
;with DuplicateWithMaxID as (
select
portnumber = a.portnumber,
max_id = a.id,
rank_id = row_number() over (partition by a.portnumber order by a.sourcereportedDate desc)
from @table a
inner join @table b on b.portnumber = a.portnumber and b.sourceport <> a.sourceport
)
select
a.id,
a.portnumber,
a.sourceport,
a.sourcereportedDate,
new_column = case when a.id = x.max_id then 'Latest' else convert(varchar(10), x.max_id) end
from
DuplicateWithMaxID x
inner join @table a on a.portnumber = x.portnumber
where
x.rank_id = 1