我有一个有几个Id的人。 其中一些在列Id1中,其中一些在Id2中。 我想把所有相同的人ID收集到一个小组。
如果id1 = 10,则在id2 = 20的同一行中。所以这意味着id1 = 10的人就像id2 = 20一样。
输入和输出示例:
Id1 Id2
--- ---
10 20
10 30
30 30
10 40
50 70
60 50
70 70
NewId OldId
----- -----
1 10
1 20
1 30
1 40
2 50
2 60
2 70
答案 0 :(得分:2)
对于递归任务,您应该使用递归CTE。
with cq as
(
select distinct Id2, Id1 from #Tmp -- get your table
union
select distinct Id1, Id2 from #Tmp -- get your table (or sort output)
union
select distinct Id1, Id1 from #Tmp -- add root from Id1
union
select distinct Id2, Id2 from #Tmp -- add root from Id2
), cte (Id1, Id2, lvl)
as (
select t.Id1, t.Id2, 0 lvl
from cq t
union all
select t2.Id2, c.Id1, lvl + 1 lvl
from cq t2, cte c
where t2.Id1 = c.Id2
and t2.Id1 != c.Id1
and c.lvl < 5 -- maximum level of recursion
)
select
Id1,
min(Id2) FirstId1,
dense_rank() over(order by min(Id2)) rn
from cte
group by Id1
如果你的桌子订购得好,Max lvl和条件带!=是不必要的。
答案 1 :(得分:1)
安娜,这是一个很好的例子吗? 这是一个连接组件问题。
Id1 Id2
--- ---
10 20
10 30
30 30
10 40
50 70
60 50
70 70
NewId OldId
----- -----
1 10
1 20
1 30
1 40
2 50
2 60
2 70
答案 2 :(得分:1)
我怀疑这可以通过递归CTE来完成,但这是一个不那么优雅的解决方案。
-- CREATE Temps
CREATE TABLE #Table (id1 INT, id2 INT)
CREATE TABLE #NewTable (NewID INT, OldID INT)
CREATE TABLE #AllIDs (ID INT)
-- Insert Test data
INSERT #Table
( id1, id2 )
VALUES ( 10, 20 ),
( 10, 30 ),
( 30, 20 ),
( 10, 40 ),
( 50, 70 ),
( 60, 50 ),
( 70, 70 ),
( 110, 120 ),
( 120, 130 ),
( 140, 130 )
-- Assemble all possible OldIDs
INSERT INTO #AllIDs
SELECT id1 FROM #Table
UNION
SELECT id2 FROM #Table
DECLARE @NewID INT = 1,
@RowCnt int
-- Insert seed OldID
INSERT #NewTable
SELECT TOP 1 @NewID, id
FROM #AllIDs
WHERE id NOT IN (SELECT OldID FROM #NewTable)
ORDER BY 2
SET @RowCnt = @@ROWCOUNT
WHILE @RowCnt > 0
BEGIN
WHILE @RowCnt > 0
BEGIN
-- Check for id2 that match current OldID
INSERT #NewTable
SELECT DISTINCT @NewID, id2
FROM #Table t
INNER JOIN #NewTable nt ON t.id1 = nt.OldID
WHERE nt.[NewID] = @NewID
AND t.id2 NOT IN (SELECT OldID FROM #NewTable WHERE [NewID] = @NewID)
SELECT @RowCnt = @@ROWCOUNT
-- Check for id1 that match current OldID
INSERT #NewTable
SELECT DISTINCT @NewID, id1
FROM #Table t
INNER JOIN #NewTable nt ON t.id2 = nt.OldID
WHERE nt.[NewID] = @NewID
AND t.id1 NOT IN (SELECT OldID FROM #NewTable WHERE [NewID] = @NewID)
SELECT @RowCnt = @RowCnt + @@ROWCOUNT
END
SET @NewID = @NewID + 1
-- Add another seed OldID if any left
INSERT #NewTable
SELECT TOP 1 @NewID, id
FROM #AllIDs
WHERE id NOT IN (SELECT OldID FROM #NewTable)
ORDER BY 2
SELECT @RowCnt = @@ROWCOUNT
END
-- Get Results
SELECT * FROM #NewTable ORDER BY [NewID], OldID
答案 3 :(得分:1)
CTE版本。请注意,我添加了一些数据点来模拟重复和单独的ID。
--create test data
declare @table table (Id1 int, Id2 int);
insert @table values
(10, 20),
(10, 30),
(30, 30),
(10, 40),
(40, 45),
(20, 40),
(50, 70),
(60, 50),
(70, 70),
(80, 80);
select *
from @table;
--join related IDs with recursive CTE
;with min_first_cte as (
select case when Id1 <= Id2 then Id1 else Id2 end Id1,
case when Id1 <= Id2 then Id2 else Id1 end Id2
from @table
), related_ids_cte as (
--anchor IDs
select distinct Id1 BaseId, Id1 ParentId, Id1 ChildId
from min_first_cte
where Id1 not in ( select Id2
from min_first_cte
where Id2 <> Id1)
union all
--related recursive IDs
select r.BaseId, m.Id1 ParentId, M.Id2 ChildId
from min_first_cte m
join related_ids_cte r
on r.ChildId = m.Id1
and m.Id1 <> m.Id2
), distinct_ids_cte as (
select distinct r.BaseId, r.ChildId
from related_ids_cte r
)
select dense_rank() over (order by d.BaseId) [NewId],
d.ChildId OldId
from distinct_ids_cte d
order by BaseId, ChildId;
答案 4 :(得分:1)
从概念上讲,它是关于在给定连接对列表的情况下查找连接组件。然后,为每个组分配一个新ID。以下实现有效:
CREATE TABLE #pairs (a int, b int)
CREATE TABLE #groups (a int, group_id int)
INSERT INTO #pairs
VALUES (1, 2), (3, 4), (5, 6), (5, 7), (3, 9), (8, 10), (11, 12), (1, 3)
-- starting stage - all items belong to their own group
INSERT INTO #groups(a, group_id)
SELECT a, a
FROM #pairs
UNION
SELECT b, b
FROM #pairs
DECLARE @a INT
DECLARE @b INT
DECLARE @cGroup INT
SET ROWCOUNT 0
SELECT * INTO #mytemp FROM #pairs
SET ROWCOUNT 1
SELECT @a = a, @b = b FROM #mytemp
WHILE @@rowcount <> 0
BEGIN
SET ROWCOUNT 0
DECLARE @aGroup INT, @bGroup INT, @newGroup INT
SELECT @aGroup = group_id FROM #groups WHERE a = @a
SELECT @bGroup = group_id FROM #groups WHERE a = @b
SELECT @newGroup = MIN(group_id) FROM #groups WHERE a IN (@a, @b)
-- update the grouping table with the new group
UPDATE #groups
SET group_id = @newGroup
WHERE group_id IN (@aGroup, @bGroup)
DELETE FROM #mytemp
WHERE a = @a
AND b = @b
SET ROWCOUNT 1
SELECT @a = a, @b = b FROM #mytemp
END
SET ROWCOUNT 0
SELECT * FROM #groups
DROP TABLE #mytemp
DROP TABLE #pairs
DROP TABLE #groups
以下是解释:
就程序而言,这些是2次迭代,不断将组ID更新为组中的最小值 - O(n2)。