将所有相似人员收集到一个组

时间:2016-11-24 11:45:32

标签: sql sql-server tsql sql-server-2012

我有一个有几个Id的人。 其中一些在列Id1中,其中一些在Id2中。 我想把所有相同的人ID收集到一个小组。

如果id1 = 10,则在id2 = 20的同一行中。所以这意味着id1 = 10的人就像id2 = 20一样。

输入和输出示例:

输入

Id1     Id2
---     ---
10      20
10      30
30      30
10      40

50      70
60      50
70      70 

输出

NewId   OldId
-----   -----
1       10
1       20
1       30
1       40

2       50
2       60
2       70

5 个答案:

答案 0 :(得分:2)

对于递归任务,您应该使用递归CTE。

with cq as 
    (
        select distinct Id2, Id1 from #Tmp -- get your table 
        union
        select distinct Id1, Id2 from #Tmp -- get your table (or sort output)
        union
        select distinct Id1, Id1 from #Tmp -- add root from Id1 
        union
        select distinct Id2, Id2 from #Tmp -- add root from Id2
    ), cte (Id1, Id2, lvl) 
    as (
        select t.Id1, t.Id2, 0 lvl 
        from cq t
        union all
        select t2.Id2, c.Id1, lvl + 1 lvl 
        from cq t2, cte c
        where t2.Id1 = c.Id2
            and t2.Id1 != c.Id1 
            and c.lvl < 5 -- maximum level of recursion
        )
    select 
        Id1, 
        min(Id2) FirstId1,
        dense_rank() over(order by min(Id2)) rn
    from cte
    group by Id1
如果你的桌子订购得好,

Max lvl和条件带!=是不必要的。

答案 1 :(得分:1)

安娜,这是一个很好的例子吗? 这是一个连接组件问题。

输入

Id1     Id2
---     ---
10      20
10      30
30      30
10      40

50      70
60      50
70      70 

输出

NewId   OldId
-----   -----
1       10
1       20
1       30
1       40

2       50
2       60
2       70

答案 2 :(得分:1)

我怀疑这可以通过递归CTE来完成,但这是一个不那么优雅的解决方案。

-- CREATE Temps
CREATE TABLE #Table (id1 INT, id2 INT)
CREATE TABLE #NewTable (NewID INT, OldID INT)
CREATE TABLE #AllIDs (ID INT)

-- Insert Test data
INSERT #Table
        ( id1, id2 )
VALUES  (  10, 20 ),
        (  10, 30 ),
        (  30, 20 ),
        (  10, 40 ),

        (  50, 70 ),
        (  60, 50 ),
        (  70, 70 ),
        (  110, 120 ),
        (  120, 130 ),
        (  140, 130 )

-- Assemble all possible OldIDs
INSERT INTO #AllIDs
    SELECT id1 FROM #Table
    UNION
    SELECT id2 FROM #Table

DECLARE @NewID INT = 1,
        @RowCnt int

-- Insert seed OldID
INSERT #NewTable
    SELECT TOP 1 @NewID, id
    FROM #AllIDs
    WHERE id NOT IN (SELECT OldID FROM #NewTable)
    ORDER BY 2

SET @RowCnt = @@ROWCOUNT

WHILE @RowCnt > 0
BEGIN   
    WHILE @RowCnt > 0
    BEGIN
        -- Check for id2 that match current OldID
        INSERT #NewTable
            SELECT DISTINCT @NewID, id2
            FROM #Table t
                INNER JOIN #NewTable nt ON t.id1 = nt.OldID
            WHERE nt.[NewID] = @NewID
                AND t.id2 NOT IN (SELECT OldID FROM #NewTable WHERE [NewID] = @NewID)

        SELECT @RowCnt = @@ROWCOUNT

        -- Check for id1 that match current OldID
        INSERT #NewTable
            SELECT DISTINCT @NewID, id1
            FROM #Table t
                INNER JOIN #NewTable nt ON t.id2 = nt.OldID
            WHERE nt.[NewID] = @NewID
                AND t.id1 NOT IN (SELECT OldID FROM #NewTable WHERE [NewID] = @NewID)

        SELECT @RowCnt = @RowCnt + @@ROWCOUNT
    END

    SET @NewID = @NewID + 1

    -- Add another seed OldID if any left
    INSERT #NewTable
        SELECT TOP 1 @NewID, id
        FROM #AllIDs
        WHERE id NOT IN (SELECT OldID FROM #NewTable)
        ORDER BY 2

    SELECT @RowCnt = @@ROWCOUNT
END

-- Get Results
SELECT * FROM #NewTable ORDER BY [NewID], OldID

答案 3 :(得分:1)

CTE版本。请注意,我添加了一些数据点来模拟重复和单独的ID。

--create test data
declare @table table (Id1 int, Id2 int);
insert  @table values 
        (10, 20),
        (10, 30),
        (30, 30),
        (10, 40),
        (40, 45),
        (20, 40),
        (50, 70),
        (60, 50),
        (70, 70),
        (80, 80);
select  *
from    @table;

--join related IDs with recursive CTE
;with min_first_cte as (
        select  case when Id1 <= Id2 then Id1 else Id2 end Id1,
                case when Id1 <= Id2 then Id2 else Id1 end Id2
        from    @table
), related_ids_cte as (
        --anchor IDs
        select  distinct Id1 BaseId, Id1 ParentId, Id1 ChildId
        from    min_first_cte
        where   Id1 not in (    select  Id2
                                from    min_first_cte
                                where   Id2 <> Id1)
        union all
        --related recursive IDs
        select  r.BaseId, m.Id1 ParentId, M.Id2 ChildId
        from    min_first_cte   m
        join    related_ids_cte r
                on  r.ChildId = m.Id1
                and m.Id1 <> m.Id2
), distinct_ids_cte as (
        select  distinct r.BaseId, r.ChildId
        from    related_ids_cte r
)
select  dense_rank() over (order by d.BaseId) [NewId],
        d.ChildId OldId
from    distinct_ids_cte d
order   by BaseId, ChildId;

答案 4 :(得分:1)

从概念上讲,它是关于在给定连接对列表的情况下查找连接组件。然后,为每个组分配一个新ID。以下实现有效:

CREATE TABLE #pairs (a int, b int)
CREATE TABLE #groups (a int, group_id int)

INSERT INTO #pairs
VALUES (1, 2), (3, 4), (5, 6), (5, 7), (3, 9), (8, 10), (11, 12), (1, 3)

-- starting stage - all items belong to their own group
INSERT INTO #groups(a, group_id)
SELECT a, a
  FROM #pairs
 UNION
SELECT b, b
  FROM #pairs

DECLARE @a INT
DECLARE @b INT
DECLARE @cGroup INT

SET ROWCOUNT 0
SELECT * INTO #mytemp FROM #pairs

SET ROWCOUNT 1

SELECT @a = a, @b = b FROM #mytemp

WHILE @@rowcount <> 0
BEGIN
    SET ROWCOUNT 0

    DECLARE @aGroup INT, @bGroup INT, @newGroup INT
    SELECT @aGroup = group_id FROM #groups WHERE a = @a
    SELECT @bGroup = group_id FROM #groups WHERE a = @b
    SELECT @newGroup = MIN(group_id) FROM #groups WHERE a IN (@a, @b)

    -- update the grouping table with the new group
    UPDATE #groups 
       SET group_id = @newGroup 
     WHERE group_id IN (@aGroup, @bGroup)

    DELETE FROM #mytemp 
     WHERE a = @a 
       AND b = @b

    SET ROWCOUNT 1
    SELECT @a = a, @b = b FROM #mytemp
END
SET ROWCOUNT 0

SELECT * FROM #groups

DROP TABLE #mytemp
DROP TABLE #pairs
DROP TABLE #groups

以下是解释:

  • 最初,为每个号码分配一组自己的值
  • 迭代对
  • 每对
    • 找到将其设置为新组ID的最小值
    • 将组ID设置为当前组ID与当前对中的数字相同的所有数字

就程序而言,这些是2次迭代,不断将组ID更新为组中的最小值 - O(n2)。