如何获得这样的重复数据?

时间:2012-01-13 17:12:18

标签: sql-server sql-server-2008 sql-server-2005

我有这样的数据
enter image description here
而类别副本是
1.复制基码如果存在重复,则结果是最后一步
enter image description here
2.如果存在重复,则重复基数1或基数2,结果为最后一步


我需要像结果一样的结果 enter image description here


谢谢,

2 个答案:

答案 0 :(得分:2)

我希望我理解正确:

DECLARE @T1 TABLE
(
    ID INT,
    CODE VARCHAR(50),
    NUMBER1 VARCHAR(50),
    NUMBER2 VARCHAR(50),
    STEP INT
)

INSERT INTO @T1(ID, CODE, NUMBER1, NUMBER2, STEP)
VALUES(1, 'XXA', '001', '009', 1)
INSERT INTO @T1(ID, CODE, NUMBER1, NUMBER2, STEP)
VALUES(2, 'XXB', '001', '008', 2)
INSERT INTO @T1(ID, CODE, NUMBER1, NUMBER2, STEP)
VALUES(3, 'XXC', '002', '009', 3)
INSERT INTO @T1(ID, CODE, NUMBER1, NUMBER2, STEP)
VALUES(4, 'XXA', '002', '008', 4)

SELECT      TAB.ID,
            TAB.CODE,
            TAB.NUMBER1,
            TAB.NUMBER2,
            TAB.STEP,
            COALESCE(CAST(DUP_CODE_T.ID AS VARCHAR(50)) + ',', '') + 
            COALESCE(CAST(DUP_NUM1_T.ID AS VARCHAR(50)) + ',', '') + 
            COALESCE(CAST(DUP_NUM2_T.ID AS VARCHAR(50)), '') AS DUPLICATE_WITH
FROM        @T1 AS TAB OUTER APPLY
            (
                SELECT      TOP 1 DUP_CODE.ID
                FROM        @T1 AS DUP_CODE
                WHERE       DUP_CODE.CODE = TAB.CODE AND
                            DUP_CODE.ID <> TAB.ID
            ) AS DUP_CODE_T OUTER APPLY
            (
                SELECT      TOP 1 DUP_NUM1.ID
                FROM        @T1 AS DUP_NUM1
                WHERE       DUP_NUM1.NUMBER1 = TAB.NUMBER1 AND
                            DUP_NUM1.ID <> TAB.ID
            ) AS DUP_NUM1_T OUTER APPLY
            (
                SELECT      TOP 1 DUP_NUM2.ID
                FROM        @T1 AS DUP_NUM2
                WHERE       DUP_NUM2.NUMBER2 = TAB.NUMBER2 AND
                            DUP_NUM2.ID <> TAB.ID
            ) AS DUP_NUM2_T

您可以添加WHERE子句或TOP 1来过滤结果。

答案 1 :(得分:2)

如果我明白你的要求,我相信这会达到你想要的效果。

它确实产生了指定的结果,至少。

declare @Data table
(
    ID int not null,
    CODE varchar(3) not null,
    NUMBER1 varchar(3) not null,
    NUMBER2 varchar(3) not null,
    STEP int not null
)

insert into @Data values
    (1, 'XXA', '001', '009', 1),
    (2, 'XXB', '001', '008', 2),
    (3, 'XXC', '002', '009', 3),
    (4, 'XXA', '002', '008', 4)

-- this query returns the record with the highest STEP
-- excluding any other records that have the same CODE,
-- NUMBER1, or NUMBER2 and a lower STEP
select
    d.ID,
    d.CODE,
    d.NUMBER1,
    d.NUMBER2,
    d.STEP
from @Data d
where
    -- there does not exist any other record that has
    -- the same CODE, NUMBER1, or NUMBER2 and a higher STEP
    not exists (
        select 1
        from @Data duplicate
        where
            (
                duplicate.CODE = d.CODE
                or duplicate.NUMBER1 = d.NUMBER1
                or duplicate.NUMBER2 = d.NUMBER2
            )
            and (
                duplicate.STEP > d.STEP
                or (
                    duplicate.STEP = d.STEP
                    and duplicate.ID > d.ID
                )
            )
    )


-- this query returns the duplicate records with a lower STEP
select
    d.ID,
    d.CODE,
    d.NUMBER1,
    d.NUMBER2,
    d.STEP,
    HigherStep.ID as HigherStepID
from @Data d
inner join (
    -- join to the duplicate record that has a higher STEP
    select
        ID,
        CODE,
        NUMBER1,
        NUMBER2
    from @Data HighestStep
    where
        -- there does not exist any other record that has
        -- the same CODE, NUMBER1, or NUMBER2 and a higher STEP
        not exists (
            select 1
            from @Data duplicate
            where
                (
                    duplicate.CODE = HighestStep.CODE
                    or duplicate.NUMBER1 = HighestStep.NUMBER1
                    or duplicate.NUMBER2 = HighestStep.NUMBER2
                )
                and (
                    duplicate.STEP > HighestStep.STEP
                    or (
                        duplicate.STEP = HighestStep.STEP
                        and duplicate.ID > HighestStep.ID
                    )
                )
        )
) HigherStep on
    HigherStep.ID <> d.ID -- don't match this record to itself
    and (
        HigherStep.CODE = d.CODE
        or HigherStep.NUMBER1 = d.NUMBER1
        or HigherStep.NUMBER2 = d.NUMBER2
    )

以下是根据Yusan的评论检索具有较低步骤的记录的修订查询。查询的一个问题是它无法捕获那些不是那些具有最高步骤的记录的立即重复的记录。

-- this query returns the duplicate records with a lower STEP
select
    d.ID,
    d.CODE,
    d.NUMBER1,
    d.NUMBER2,
    d.STEP,
    HigherStep.ID as HigherStepID
from @Data d
inner join (
    -- join to the duplicate record that has a higher STEP
    select
        ID,
        CODE,
        NUMBER1,
        NUMBER2,
        STEP
    from @Data
) HigherStep on
    (
        HigherStep.CODE = d.CODE
        or HigherStep.NUMBER1 = d.NUMBER1
        or HigherStep.NUMBER2 = d.NUMBER2
    )
    and (
        HigherStep.STEP > d.STEP
        or (
            HigherStep.STEP = d.STEP
            and HigherStep.ID > d.ID
        )
    )