创建相关列的集群

时间:2016-10-12 14:36:30

标签: tsql

我有一个名为Stores的表格,其中包含列:

StoreCode    NVARCHAR(10),
OldStoreCode NVARCHAR(10)

以下是我的数据示例:

| StoreCode | OldStoreCode |
|-----------|--------------|
| A         | B            |
| B         | A            |
| D         | E            |
| E         | F            |
| M         | K            |
| J         | K            |
| K         | L            |
|-----------|--------------|

我想创建相关Stores的群集。相关商店意味着StoreCodesOldStoreCodes之间存在单向关系。

预期结果表:

| StoreCode | ClusterId |
|-----------|-----------|
| A         | 1         |
| B         | 1         |
| D         | 2         |
| E         | 2         |
| F         | 2         |
| M         | 3         |
| K         | 3         |
| J         | 3         |
| L         | 3         |
|-----------|-----------|

没有最大数量的跳数。可能有StoreCode A OldStoreCode BOldStoreCode COldStoreCode DaddPoint等。

我如何对这样的商店进行集群?

2 个答案:

答案 0 :(得分:0)

这应该这样做:

示例数据:

IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL
    BEGIN
       DROP TABLE #Temp1;
    END;

CREATE TABLE #Temp1(StoreCode    NVARCHAR(10)
               , OldStoreCode NVARCHAR(10));

INSERT INTO       #Temp1(StoreCode
                   , OldStoreCode)
VALUES
      ('A'
     , 'B'),
      ('B'
     , 'A'),
      ('D'
     , 'E'),
      ('E'
     , 'F'),
      ('M'
     , 'K'),
      ('J'
     , 'K'),
      ('K'
     , 'L');

QUERY:

;WITH A -- get all distinct new and old storecodes
    AS (
    SELECT StoreCode
    FROM     #Temp1
    UNION
    SELECT OldStoreCode
    FROM   #Temp1),
    B -- give a unique number id to each store code
    AS (SELECT rn = RANK() OVER(ORDER BY StoreCode)
            , StoreCode
        FROM   A),
    C -- combine the store codes and the unique number id's  in one table
    AS (SELECT b2.rn AS StoreCodeID
            , t.StoreCode
            , b1.rn AS OldStoreCodeId
            , t.OldStoreCode
        FROM   #Temp1 AS t
             LEFT OUTER JOIN B AS b1 ON t.OldStoreCode = b1.StoreCode
             LEFT OUTER JOIN B AS b2 ON t.StoreCode = b2.StoreCode),
    D -- assign a row number for each entry in the data set
    AS (SELECT rn = RANK() OVER(ORDER BY StoreCode)
            , *
        FROM   C),
    E -- derive first and last store in the path
    AS (SELECT FirstStore = d2.StoreCode
            , LastStore = d1.OldStoreCode
            , GroupID = d1.OldStoreCodeId
        FROM   D AS d1
             RIGHT OUTER JOIN D AS d2 ON d1.StoreCodeID = d2.OldStoreCodeId
                                    AND d1.rn - 1 = d2.rn
        WHERE  d1.OldStoreCode IS NOT NULL) ,
    F -- get the stores wich led to the last store with one hop
    AS  (SELECT C.StoreCode
            , E.GroupID
        FROM   E
             INNER JOIN C ON E.LastStore = C.OldStoreCode)

     -- combine to get the full grouping
     SELECT A.StoreCode, ClusterID = DENSE_RANK() OVER (ORDER BY A.GroupID)  FROM (
    SELECT C.StoreCode,F.GroupID FROM C INNER JOIN F ON C.OldStoreCode = F.StoreCode 
    UNION 
    SELECT * FROM F
    UNION 
    SELECT E.LastStore,E.GroupID FROM E) AS A ORDER BY StoreCode, ClusterID

结果:

enter image description here

答案 1 :(得分:0)

试试这样:

编辑:通过评论

中的OP进行更改
DECLARE @tbl TABLE(ID INT IDENTITY, StoreCode VARCHAR(100),OldStoreCode VARCHAR(100));
INSERT INTO @tbl VALUES
 ('A','B'),('B','A'),('D','E'),('E','F'),('M','K'),('J','K'),('K','L');

WITH Related AS
(
    SELECT DISTINCT t1.ID,Val
    FROM @tbl AS t1
    INNER JOIN @tbl AS t2 ON t1.StoreCode=t2.StoreCode 
                          OR t1.OldStoreCode=t2.OldStoreCode 
                          OR t1.OldStoreCode=t2.StoreCode 
                          OR t1.StoreCode=t2.OldStoreCode
    CROSS APPLY(SELECT DISTINCT Val
                FROM
                (VALUES(t1.StoreCode),(t2.StoreCode),(t1.OldStoreCode),(t2.OldStoreCode)) AS A(Val)
               ) AS valsInCols
)
,ClusterKeys AS
(
    SELECT r1.ID
          ,(
            SELECT r2.Val AS [*] 
            FROM Related AS r2
            WHERE r2.ID=r1.ID
            ORDER BY r2.Val
            FOR XML PATH('')
           ) AS ClusterKey
    FROM Related AS r1
    GROUP BY r1.ID
)
,ClusterIds AS 
( 
    SELECT ClusterKey
          ,MIN(ID) AS ID 
    FROM ClusterKeys 
    GROUP BY ClusterKey 
) 
SELECT r.ID
     ,r.Val 
FROM ClusterIds c 
INNER JOIN Related r ON c.ID = r.ID 

结果

ID  Val
1   A
1   B
3   D
3   E
3   F
5   J
5   K
5   L
5   M