根据识别的NULL值将数据分组到单独的分区中

时间:2019-01-11 19:04:17

标签: sql sql-server

我正在寻找一个基于NULL值的分区,如下面的“ GroupNumber”列中所示。目的是在窗口函数语句中,我的数据集中没有另一个可以将组分开的标识符(例如,在下面看到以得出“ GroupNumber”列)。关键是创建此“ GroupNumber”列。当存在NULL值时(按日期DESC排序),是否有一种方法可以中断/重置分区?注意:每个分区可以有多个NULL实例。任何帮助表示赞赏。

方法:

  1. 创建位标志列以表示NULL值。
  2. 使用滚动总和(按日期DESC排序)创建这些组。这是一个很好的方法,因为在每个观察到的NULL值处,“ GROUP”字段都会动态增加。这将允许使用此新字段作为分区进行汇总计算。

示例设置:

IF OBJECT_ID('tempdb..#GroupNULL', 'U') IS NOT NULL
DROP TABLE #GroupNULL

CREATE TABLE #GroupNULL
([ID] INT NOT NULL,
[Date] date NULL,
[Number] INT NULL)

INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/12/2018', 35)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/11/2018', 27)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/10/2018', 7)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/9/2018', 18)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/8/2018', NULL)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/7/2018', 3)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/6/2018', 42)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/5/2018', 16)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/4/2018', 9)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/3/2018', NULL)

更多内容:我想将此数据集划分为2个组,第一个NULL值(按日期DESC排序)为该组的第一个值。

2 个答案:

答案 0 :(得分:0)

这是一个例子,可以使您更加接近。它使用窗口聚合来加总您在查询给定的表的给定顺序中看到的NULL数量。这适用于最新版本的SQL Server / SQL Azure(我相信SQL Server 2012 +)

drop table t1
create table t1 (col1 int, col2 int)
insert into t1(col1, col2) values (1, 1)
insert into t1(col1, col2) values (1, 10)
insert into t1(col1, col2) values (2, NULL)
insert into t1(col1, col2) values (2, 10)
insert into t1(col1, col2) values (3, 2)
insert into t1(col1, col2) values (3, NULL)

SELECT
    col1, 
    col2,
    IsBoundary,
    SUM(IsBoundary) OVER(ORDER BY col1, col2 ROWS UNBOUNDED PRECEDING) + 1 as GroupNumber
FROM
(
    SELECT 
        col1, 
        col2,
        CASE WHEN col2 is NULL then 1 ELSE 0 END as IsBoundary
    FROM 
        t1
) A
ORDER BY col1, col2

col1        col2        IsBoundary  GroupNumber
----------- ----------- ----------- -----------
1           1           0           1
1           10          0           1
2           NULL        1           2
2           10          0           2
3           NULL        1           3
3           2           0           3

答案 1 :(得分:0)

设置

IF OBJECT_ID('tempdb..#GroupNULL', 'U') IS NOT NULL
DROP TABLE #GroupNULL

CREATE TABLE #GroupNULL
([ID] INT NOT NULL,
[Date] date NULL,
[Number] INT NULL)

INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/12/2018', 35)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/11/2018', 27)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/10/2018', 7)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/9/2018', 18)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/8/2018', NULL)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/7/2018', 3)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/6/2018', 42)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/5/2018', 16)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/4/2018', 9)
INSERT INTO #GroupNULL (ID, Date, Number) VALUES (1001, '8/3/2018', NULL)

解决方案

SELECT  x.*, 
        SUM(Flagged) OVER(ORDER BY ID, Date DESC ROWS UNBOUNDED PRECEDING) AS [GroupNumber] 
FROM
(SELECT *, 
        CASE    WHEN LAG(Number) OVER(PARTITION BY ID ORDER BY Date DESC) IS NULL 
                THEN 1 
                ELSE 0 
        END AS [Flagged] 
FROM #GroupNULL) x

ID          Date       Number      Flagged     GroupNumber
----------- ---------- ----------- ----------- -----------
1001        2018-08-12 35          1           1
1001        2018-08-11 27          0           1
1001        2018-08-10 7           0           1
1001        2018-08-09 18          0           1
1001        2018-08-08 NULL        0           1
1001        2018-08-07 3           1           2
1001        2018-08-06 42          0           2
1001        2018-08-05 16          0           2
1001        2018-08-04 9           0           2
1001        2018-08-03 NULL        0           2