在T-sql中查找具有不同值的重复项?

时间:2016-09-21 08:58:13

标签: sql-server sql-server-2008 tsql

我遇到了从不同的sourceport系统中找到重复项的情况。

对于Ex:我得到了如下表格:

declare @table table (id int,portnumber int, [sourceport] varchar(50), sourcereportedDate datetime )
insert into @table values  (1, 1111, 'north'  , '2016-08-20 09:44:30.847')
insert into @table values  (2, 1111, 'north'  , '2016-08-21 09:44:30.847')
insert into @table values  (3, 1111, 'north'  , '2016-08-22 09:44:30.847')

insert into @table values  (4, 2222, 'north'  , '2016-08-20 09:44:30.847')
insert into @table values  (5, 2222, 'north'  , '2016-08-26 09:44:30.847')
insert into @table values  (6, 2222, 'south'  , '2016-08-22 09:44:30.847')

insert into @table values  (7, 3333, 'south'  , '2016-08-10 09:44:30.847')
insert into @table values  (8, 3333, 'north'  , '2016-08-12 09:44:30.847')

insert into @table values  (9, 4444, 'north'  , '2016-08-20 09:44:30.847')

insert into @table values  (10, 5555, 'south' , '2016-08-21 09:44:30.847')
insert into @table values  (11, 5555, 'south' , '2016-08-27 09:44:30.847')

insert into @table values  (12, 6666, 'south' , '2016-08-10 09:44:30.847')
insert into @table values  (13, 6666, 'north' , '2016-08-21 09:44:30.847')
insert into @table values  (14, 6666, 'south' , '2016-08-09 09:44:30.847')

现在,我希望找到重复的' portnumber'应该是相同的' sourceport'应该是不同的。如果' portnumber'相同和' sourceport'同样它不应该重复。 我还需要additional column,其中包含Id最伟大的sourcereportedDate'日期

我想获得如下输出:

(4, 2222, 'north'  , '2016-08-20 09:44:30.847',5)
(5, 2222, 'north'  , '2016-08-26 09:44:30.847','latest')
(6, 2222, 'south'  , '2016-08-22 09:44:30.847',5)
(7, 3333, 'south'  , '2016-08-10 09:44:30.847',8)
(8, 3333, 'north'  , '2016-08-12 09:44:30.847','latest')
(12, 6666, 'south' , '2016-08-10 09:44:30.847',13)
(13, 6666, 'north' , '2016-08-21 09:44:30.847','latest')
(14, 6666, 'south' , '2016-08-09 09:44:30.847',13)

提前致谢。

2 个答案:

答案 0 :(得分:1)

试试这个:

;
with
dis as
(
    select distinct portnumber, sourceport 
        from @table
),
dup as
(
    select portnumber 
        from dis 
        group by portnumber 
        having count(1) > 1
),
mx as
(
    select 
        dup.portnumber,
        max(t.sourcereportedDate) as sourcereportedDate
    from 
        dup
    join
        @table as t
    on
        t.portnumber = dup.portnumber
    group by
        dup.portnumber
),
mxi as
(
    select
        mx.portnumber,
        t.id
    from
        mx
    left join
        @table as t
    on
        t.portnumber = mx.portnumber
    and t.sourcereportedDate = mx.sourcereportedDate
)
select 
    t.id,
    t.portnumber,
    t.sourceport,
    t.sourcereportedDate,
    case when t.id = mxi.id 
        then 'latest' 
        else cast(mxi.id as varchar(10)) 
        end as latest
from 
    dup
join
    @table as t
on
    t.portnumber = dup.portnumber
join
    mxi
on
    mxi.portnumber = t.portnumber
left join
    mx
on
    mx.portnumber = t.portnumber
and mx.sourcereportedDate = t.sourcereportedDate

答案 1 :(得分:1)

请尝试这个(我仍然认为可以进一步优化) -

;with DupWithMaxDate as (
    select
        a.portnumber,
        sourcereportedDate = max(a.sourcereportedDate)
    from        @table a
    left join   @table b on b.portnumber = a.portnumber and b.sourceport <> a.sourceport
    where       b.portnumber is not null
    group by    a.portnumber
),
DupWithMaxID as (
    select
        a.portnumber,
        max_id = a.id
    from        DupWithMaxDate x
    inner join  @table a on a.portnumber = x.portnumber and a.sourcereportedDate = x.sourcereportedDate
)
select
    a.id,
    a.portnumber,
    a.sourceport,
    a.sourcereportedDate,
    new_column = case when a.id = x.max_id then 'Latest' else convert(varchar(10), x.max_id) end
from        DupWithMaxID x
inner join  @table a on a.portnumber = x.portnumber

更新了以上查询 -

;with DuplicateWithMaxID as (
    select
        portnumber = a.portnumber,
        max_id = a.id,
        rank_id = row_number() over (partition by a.portnumber order by a.sourcereportedDate desc)
    from        @table a
    inner join  @table b on b.portnumber = a.portnumber and b.sourceport <> a.sourceport
)
select
    a.id,
    a.portnumber,
    a.sourceport,
    a.sourcereportedDate,
    new_column = case when a.id = x.max_id then 'Latest' else convert(varchar(10), x.max_id) end
from
    DuplicateWithMaxID x
inner join  @table a on a.portnumber = x.portnumber
where
    x.rank_id = 1