按顺序分组记录

时间:2017-07-19 03:46:37

标签: sql sql-server

我需要按顺序获取每个组中的记录数。例如,如果我有这样的表

enter image description here

如果下一条记录的开头是以下记录的结尾,那么它们应该在一个组中。所以我需要输出像这样

enter image description here

是否可以这样做?

1 个答案:

答案 0 :(得分:2)

如果你的字符串看起来像一系列字符,那么就是一个数字(例如" XXX001"或" XXXXXXXX001"或" ABC1"),那么你可以用一种方式这样做的方法是首先使用PATINDEX提取数字,然后使用LEAD()LAG()查找模式的开头和结尾。然后使用ROW_NUMBER()加入相应的开始和结束,并对开始和结束之间的行进行计数。

例如:

DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES 
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');

WITH CTE AS (
    SELECT *,
            StartOfSequence = CASE WHEN LAG(EndNum) OVER (PARTITION BY Prefix ORDER  BY ID) IS NULL OR StartNum - LAG(EndNum) OVER (PARTITION BY Prefix ORDER BY ID) > 1 THEN 1 ELSE 0 END,
            EndOfSequence = CASE WHEN LEAD(StartNum) OVER (PARTITION BY Prefix ORDER BY ID) IS NULL OR LEAD(StartNum) OVER (PARTITION BY Prefix ORDER BY ID) - EndNum > 1 THEN 1 ELSE 0 END
    FROM (
        SELECT ID,
                Start,
                [End],
                StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
                EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
                Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
        FROM @T
    ) AS T
) 
SELECT C1.Start, C2.[End], Total = (SELECT COUNT(*) FROM CTE WHERE ID >= C1.ID AND ID <= C2.ID AND Prefix = C1.Prefix)
FROM (
    SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
    FROM CTE
    WHERE StartOfSequence = 1
) AS C1
JOIN (
    SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
    FROM CTE
    WHERE EndOfSequence = 1
) AS C2 ON C1.RN = C2.RN
    AND C1.Prefix = C2.Prefix;

或者使用以下@Pரதீப்建议的修改来包含前缀,

DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES 
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');

WITH CTE AS (
    SELECT *,
            prevEndNum = LAG(EndNum) OVER (PARTITION BY Prefix ORDER  BY ID)
    FROM (
        SELECT ID,
                Start,
                [End],
                StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
                EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
                Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
        FROM @T
    ) AS T
) 
SELECT DISTINCT Prefix,
                First_value(start) OVER(PARTITION BY Prefix, grp ORDER BY id), 
                First_value([end]) OVER(PARTITION BY Prefix, grp ORDER BY id DESC), 
                Count(*) OVER( partition BY grp) 
FROM   (SELECT grp = Sum(CASE WHEN startnum = prevendnum + 1 THEN 0 ELSE 1 END) 
                       OVER(PARTITION BY Prefix ORDER BY id),* 
        FROM   CTE)a 

注意:如果由于某种原因ID完全无序,您可以根据需要将所有ORDER BY ID部分与ORDER BY StartNum或类似的部分交换。

编辑SQL Server 2005:

DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES 
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');

WITH CTE1 AS (
    SELECT ID,
           Start,
           [End],
           StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
           EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
           Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
    FROM @T
),
    CTE2 AS (
    SELECT C.*, 
           StartOfSequence = CASE WHEN PrevRow.EndNum IS NULL OR C.StartNum - PrevRow.EndNum > 1 THEN 1 ELSE 0 END,
           EndOfSequence = CASE WHEN NextRow.StartNum IS NULL OR Nextrow.StartNum - C.EndNum > 1 THEN 1 ELSE 0 END
    FROM CTE1 AS C
    OUTER APPLY (SELECT TOP 1 StartNum FROM CTE1 WHERE Prefix = C.Prefix AND ID > C.ID ORDER BY ID) NextRow
    OUTER APPLY (SELECT TOP 1 EndNum FROM CTE1 WHERE Prefix = C.Prefix AND ID < C.ID ORDER BY ID DESC) PrevRow
)
SELECT C1.Start, C2.[End], Total = (SELECT COUNT(*) FROM CTE2 WHERE ID >= C1.ID AND ID <= C2.ID AND Prefix = C1.Prefix)
FROM (
    SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
    FROM CTE2
    WHERE StartOfSequence = 1
) AS C1
JOIN (
    SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
    FROM CTE2
    WHERE EndOfSequence = 1
) AS C2 ON C1.RN = C2.RN
    AND C1.Prefix = C2.Prefix;