在SQL Server中将连续键分组为范围

时间:2019-03-17 14:20:40

标签: sql-server-2012

在SQL Server 2012中,我的(简化的)表如下所示:

Key  SubKey Quantity
--------------------
96614   1   0.604800
96615   1   1.920000
96615   2   3.840000
96616   1   1.407600
96617   1   0.453600
96617   2   3.568320
96617   3   2.710260
96618   1   11.520000
96619   1   0.453600
96620   1   7.919100
96620   2   4.082400
96626   1   14.394000
96627   1   9.525600
96627   2   4.762800
96627   3   4.536000
96628   1   2.268000

我的查询需要识别连续的键(子键基本上是不相关的)并将它们分组为多个范围,并适当地对数量进行求和。因此,上面的预期输出将是:

KeyRange    TotalQuantity
-------------------------
96614-96620 38.47968
96626-96628 35.48640

我尝试遵循一些使用窗口函数的示例,但是我认为由于它们满足不同的目的,所以对我来说没有多大意义。这是正确的方法吗?

3 个答案:

答案 0 :(得分:1)

我认为您不能直接使用内置插件,尽管它们是我解决方案的一部分。下面的代码基本上可以检测范围的开始和结束(表中没有一个键值分别小于/大于一个的条目),并使用它们将一个与之连接的数据分组在一起,并使用between子句。

WITH RangeStarts AS (
  SELECT 
    ROW_NUMBER () OVER (ORDER BY [Key] ASC) RangeId, 
    [Key] RangeStart
  FROM (SELECT DISTINCT [Key] FROM ConsKeyAsTable t) t
  WHERE NOT Exists (
    SELECT * FROM ConsKeyAsTable t2 WHERE t2.[Key] = t.[Key] - 1
  )
)
,RangeEnds AS (
  SELECT
    ROW_NUMBER () OVER (ORDER BY [Key] ASC) RangeId, 
    [Key] RangeEnd
  FROM (SELECT DISTINCT [Key] FROM ConsKeyAsTable t) t
  WHERE NOT Exists (
    SELECT * FROM ConsKeyAsTable t2 WHERE t2.[Key] = t.[Key] + 1
  )
)
SELECT 
  Cast(s.RangeStart as varchar(10)) + '-' + Cast(e.RangeEnd as varchar(10)) as KeyRange,
  SUM(t.Quantity) as Quantity
FROM RangeStarts s
  INNER JOIN RangeEnds e ON s.RangeId = e.RangeId
  INNER JOIN ConsKeyAsTable t ON t.[Key] BETWEEN s.RangeStart AND e.RangeEnd
GROUP BY
  s.RangeStart,
  e.RangeEnd

Sql小提琴 http://sqlfiddle.com/#!18/080fa/31

设置代码

CREATE TABLE ConsKeyAsTable ([Key] int NOT NULL, [SubKey] int NOT NULL, Quantity float, Constraint PK PRIMARY KEY CLUSTERED ([Key], [SubKey]))

INSERT ConsKeyAsTable VALUES 
(96614,   1,   0.604800),
(96615,   1,   1.920000),
(96615,   2,   3.840000),
(96616,   1,   1.407600),
(96617,   1,   0.453600),
(96617,   2,   3.568320),
(96617,   3,   2.710260),
(96618,   1,   11.520000),
(96619,   1,   0.453600),
(96620,   1,   7.919100),
(96620,   2,   4.082400),
(96626,   1,   14.394000),
(96627,   1,   9.525600),
(96627,   2,   4.762800),
(96627,   3,   4.536000),
(96628,   1,   2.268000)

答案 1 :(得分:1)

使用window functions和序号tallies with recursive CTE's的组合,以下代码应该可以工作(并且还将处理示例中的单数范围;请参见下面的setup SQL语句):

DECLARE @start INT = (SELECT MIN(pKey) FROM @t);
DECLARE @end INT = (SELECT MAX(pKey) FROM @t);

WITH cte_RangeTally AS (
    SELECT @start num
    UNION ALL
    SELECT num + 1 FROM cte_RangeTally WHERE num+1 <= @end),
cte_Group AS (
    SELECT 
        T.pKey,
        ROW_NUMBER() OVER (ORDER BY RT.num) - ROW_NUMBER() OVER (ORDER BY T.pKey) grp
    FROM
        cte_RangeTally RT
    LEFT JOIN 
        (SELECT pKey 
        FROM @t 
        GROUP BY pKey) T ON RT.num = T.pKey),
cte_NumRanges AS (
    SELECT
        pKey,
        FIRST_VALUE(pKey) OVER(PARTITION BY grp 
                               ORDER BY pKey
                               ROWS BETWEEN UNBOUNDED PRECEDING 
                               AND CURRENT ROW) AS FirstNum,
        LAST_VALUE(pKey) OVER(PARTITION BY grp 
                               ORDER BY pKey
                               ROWS BETWEEN UNBOUNDED PRECEDING 
                               AND UNBOUNDED FOLLOWING) AS LastNum
    FROM
        cte_Group
    WHERE 
        cte_Group.pKey IS NOT NULL)
SELECT 
    CAST(NR.FirstNum AS VARCHAR(10)) + ' - ' + CAST(NR.LastNum AS VARCHAR(10)),
    SUM(T1.Quantity) AS TotalQty
FROM
    cte_NumRanges NR
RIGHT JOIN
    @t T1 ON T1.pKey = NR.pKey
GROUP BY 
    NR.FirstNum, 
    NR.LastNum;

假定以下设置代码:

DECLARE @t TABLE (pKey INT, SubKey INT, Quantity FLOAT);

INSERT @t VALUES 
(96614,   1,   0.604800),
(96615,   1,   1.920000),
(96615,   2,   3.840000),
(96616,   1,   1.407600),
(96617,   1,   0.453600),
(96617,   2,   3.568320),
(96617,   3,   2.710260),
(96618,   1,   11.520000),
(96619,   1,   0.453600),
(96620,   1,   7.919100),
(96620,   2,   4.082400),
(96626,   1,   14.394000),
(96627,   1,   9.525600),
(96627,   2,   4.762800),
(96627,   3,   4.536000),
(96628,   1,   2.268000),
(96630,   1,   2.165000),
(96632,   1,   2.800000),
(96633,   1,   2.900000);

答案 2 :(得分:0)

(编辑:正如@scrawny所指出的,该解决方案目前不支持奇异范围。)

我独立于@MonkeyPushButton发布的答案的想法并没有成功-我试图使用LAG和LEAD以及其他一些技术,但无法使其运行。但是在此过程中,我有另一个想法,我已经在此处发布了。我不认为这比Monkey的“更好”,但我认为其他人可能对此感兴趣。 (我完全totally窃了他的设置代码,希望可以。)

SQL小提琴http://sqlfiddle.com/#!18/8e86a/3

CREATE TABLE MyTable ([Key] int NOT NULL, [SubKey] int NOT NULL, Quantity float, Constraint PK PRIMARY KEY CLUSTERED ([Key], [SubKey]))

INSERT MyTable VALUES 
(96614,   1,   0.604800),
(96615,   1,   1.920000),
(96615,   2,   3.840000),
(96616,   1,   1.407600),
(96617,   1,   0.453600),
(96617,   2,   3.568320),
(96617,   3,   2.710260),
(96618,   1,   11.520000),
(96619,   1,   0.453600),
(96620,   1,   7.919100),
(96620,   2,   4.082400),
(96626,   1,   14.394000),
(96627,   1,   9.525600),
(96627,   2,   4.762800),
(96627,   3,   4.536000),
(96628,   1,   2.268000)

表的四个调用用于创建键范围集。 t1和t4创建StartKey,t2和t3创建EndKey。

WITH cte_KeyRange AS
     (
            SELECT [Key] AS StartKey,
                    (
                          SELECT MIN([Key])
                          FROM MyTable t2
                          WHERE t2.[Key] > t1.[Key]
                                 AND NOT EXISTS
                                 (
                                        SELECT [Key]
                                        FROM MyTable t3
                                        WHERE t3.[Key] = t2.[Key] + 1
                                 )
                   ) AS EndKey
            FROM MyTable t1
            WHERE NOT EXISTS
                   (
                          SELECT [Key]
                          FROM MyTable t4
                          WHERE t4.[Key] = t1.[Key] - 1
                   )
     )
SELECT CAST(StartKey AS varchar(10)) + '-' + CAST(EndKey AS varchar(10)) AS KeyRange, SUM(Quantity) AS TotalQuantity
FROM cte_KeyRange INNER JOIN MyTable ON [Key] BETWEEN StartKey AND EndKey
GROUP BY StartKey, EndKey