Group wise Last Null具有SQL特定条件的值

时间:2018-02-01 06:26:52

标签: sql-server tsql

给定数据

ID  X_ST    Y_ST    STATUS_FLAG T_DATE  Last Tran   HOURDIFF
42015   0   2   3   1/1/2017 0:00   1/1/2017 23:49  NULL
42015   0   2   3   1/2/2017 0:00   1/2/2017 23:49  NULL
42015   0   2   3   1/3/2017 0:00   1/3/2017 23:49  NULL
42015   0   2   3   1/4/2017 0:00   1/4/2017 23:49  NULL
42015   0   2   3   1/5/2017 0:00   1/5/2017 23:49  NULL
42015   0   2   3   1/6/2017 0:00   1/6/2017 20:49  NULL
42015   0   3   1   1/6/2017 0:00   1/6/2017 21:46  NULL
42015   1   0   2   1/9/2017 0:00   1/9/2017 23:49  9.7
42015   1   0   2   1/10/2017 0:00  1/10/2017 22:49 11.7
42015   0   2   3   1/10/2017 0:00  1/10/2017 23:49 11.7
42015   1   0   2   1/11/2017 0:00  1/11/2017 22:49 10.5
42015   0   2   3   1/11/2017 0:00  1/11/2017 23:49 10.5
42015   1   0   2   1/12/2017 0:00  1/12/2017 20:49 9.3
42015   0   2   3   1/12/2017 0:00  1/12/2017 23:49 9.3
42015   1   0   2   1/13/2017 0:00  1/13/2017 13:49 1.2
42015   0   2   3   1/13/2017 0:00  1/13/2017 22:49 1.2
42015   0   2   3   1/14/2017 0:00  1/14/2017 23:49 NULL
42015   0   2   3   1/15/2017 0:00  1/15/2017 23:49 NULL
42015   0   2   3   1/16/2017 0:00  1/16/2017 23:49 0.2
42015   0   2   3   1/17/2017 0:00  1/17/2017 23:49 0.7
42015   0   2   3   1/18/2017 0:00  1/18/2017 23:49 NULL
42015   0   2   3   1/19/2017 0:00  1/19/2017 23:49 NULL
42015   0   2   3   1/20/2017 0:00  1/20/2017 23:49 NULL
42015   0   2   3   1/21/2017 0:00  1/21/2017 23:49 NULL
42015   0   2   3   1/22/2017 0:00  1/22/2017 23:49 NULL
42015   0   2   3   1/23/2017 0:00  1/23/2017 20:49 NULL
42015   0   2   3   1/24/2017 0:00  1/24/2017 21:49 NULL
42015   0   3   1   1/24/2017 0:00  1/24/2017 22:34 NULL

所需数据

ID  X_ST    Y_ST    STATUS_FLAG T_DATE  Last Tran   HOURDIFF    DAYS
42015   0   2   3   1/1/2017 0:00   1/1/2017 23:49  NULL    NULL
42015   0   2   3   1/2/2017 0:00   1/2/2017 23:49  NULL    NULL
42015   0   2   3   1/3/2017 0:00   1/3/2017 23:49  NULL    NULL
42015   0   2   3   1/4/2017 0:00   1/4/2017 23:49  NULL    NULL
42015   0   2   3   1/5/2017 0:00   1/5/2017 23:49  NULL    NULL
42015   0   2   3   1/6/2017 0:00   1/6/2017 20:49  NULL    NULL
42015   0   3   1   1/6/2017 0:00   1/6/2017 21:46  NULL    **6**
42015   1   0   2   1/9/2017 0:00   1/9/2017 23:49  9.7    NULL
42015   1   0   2   1/10/2017 0:00  1/10/2017 22:49 11.7    NULL
42015   0   2   3   1/10/2017 0:00  1/10/2017 23:49 11.7    NULL
42015   1   0   2   1/11/2017 0:00  1/11/2017 22:49 10.5    NULL
42015   0   2   3   1/11/2017 0:00  1/11/2017 23:49 10.5    NULL
42015   1   0   2   1/12/2017 0:00  1/12/2017 20:49 9.3     NULL
42015   0   2   3   1/12/2017 0:00  1/12/2017 23:49 9.3     NULL
42015   1   0   2   1/13/2017 0:00  1/13/2017 13:49 1.2     NULL
42015   0   2   3   1/13/2017 0:00  1/13/2017 22:49 1.2     NULL
42015   0   2   3   1/14/2017 0:00  1/14/2017 23:49 NULL    NULL
42015   0   2   3   1/15/2017 0:00  1/15/2017 23:49 NULL    NULL
42015   0   2   3   1/16/2017 0:00  1/16/2017 23:49 0.2     NULL
42015   0   2   3   1/17/2017 0:00  1/17/2017 23:49 0.7     NULL
42015   0   2   3   1/18/2017 0:00  1/18/2017 23:49 NULL    NULL
42015   0   2   3   1/19/2017 0:00  1/19/2017 23:49 NULL    NULL
42015   0   2   3   1/20/2017 0:00  1/20/2017 23:49 NULL    NULL
42015   0   2   3   1/21/2017 0:00  1/21/2017 23:49 NULL    NULL
42015   0   2   3   1/22/2017 0:00  1/22/2017 23:49 NULL    NULL
42015   0   2   3   1/23/2017 0:00  1/23/2017 20:49 NULL    NULL
42015   0   2   3   1/24/2017 0:00  1/24/2017 21:49 NULL    NULL
42015   0   3   1   1/24/2017 0:00  1/24/2017 22:34 NULL    **7**

这里我只关注STATUS_FLAG = 1和3。 当Status标志为1时,我需要检查以前的所有status_flag = 3和HOUR DIFF必须为NULL。,然后我想计算所有NULL记录。

请在列中查看所需结果。

请在此处找到附件image.enter image description

任何帮助都会很明显。 img1 img2

由于 Saumil Shah

1 个答案:

答案 0 :(得分:0)

是的,肯定不是最有效的方法,但使用递归公用表表达式可以解决这个问题。以下是完整的工作示例:

DECLARE @DataSource TABLE
(
     [ID] INT
    ,[X_ST] BIT
    ,[Y_ST] TINYINT
    ,[STATUS_FLAG] TINYINT
    ,[T_DATE] DATETIME2
    ,[Last Tran] DATETIME2
    ,[HOURDIFF] DECIMAL(9,1)
);

INSERT INTO @DataSource ([ID], [X_ST], [Y_ST], [STATUS_FLAG], [T_DATE], [Last Tran], [HOURDIFF])
VALUES   (42015, 0, 2, 3, '1/1/2017 0:00', '1/1/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/2/2017 0:00', '1/2/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/3/2017 0:00', '1/3/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/4/2017 0:00', '1/4/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/5/2017 0:00', '1/5/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/6/2017 0:00', '1/6/2017 20:49', NULL)
        ,(42015, 0, 3, 1, '1/6/2017 0:00', '1/6/2017 21:46', NULL)
        ,(42015, 1, 0, 2, '1/9/2017 0:00', '1/9/2017 23:49', 9.7)
        ,(42015, 1, 0, 2, '1/10/2017 0:00', '1/10/2017 22:49', 11.7)
        ,(42015, 0, 2, 3, '1/10/2017 0:00', '1/10/2017 23:49', 11.7)
        ,(42015, 1, 0, 2, '1/11/2017 0:00', '1/11/2017 22:49', 10.5)
        ,(42015, 0, 2, 3, '1/11/2017 0:00', '1/11/2017 23:49', 10.5)
        ,(42015, 1, 0, 2, '1/12/2017 0:00', '1/12/2017 20:49', 9.3)
        ,(42015, 0, 2, 3, '1/12/2017 0:00', '1/12/2017 23:49', 9.3)
        ,(42015, 1, 0, 2, '1/13/2017 0:00', '1/13/2017 13:49', 1.2)
        ,(42015, 0, 2, 3, '1/13/2017 0:00', '1/13/2017 22:49', 1.2)
        ,(42015, 0, 2, 3, '1/14/2017 0:00', '1/14/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/15/2017 0:00', '1/15/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/16/2017 0:00', '1/16/2017 23:49', 0.2)
        ,(42015, 0, 2, 3, '1/17/2017 0:00', '1/17/2017 23:49', 0.7)
        ,(42015, 0, 2, 3, '1/18/2017 0:00', '1/18/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/19/2017 0:00', '1/19/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/20/2017 0:00', '1/20/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/21/2017 0:00', '1/21/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/22/2017 0:00', '1/22/2017 23:49', NULL)
        ,(42015, 0, 2, 3, '1/23/2017 0:00', '1/23/2017 20:49', NULL)
        ,(42015, 0, 2, 3, '1/24/2017 0:00', '1/24/2017 21:49', NULL)
        ,(42015, 0, 3, 1, '1/24/2017 0:00', '1/24/2017 22:34', NULL);

WITH DataSourceRaw AS
(
    SELECT *
          ,ROW_NUMBER() OVER (ORDER BY [T_DATE] ASC, [Last Tran] ASC) AS [RowNumber]
    FROM @DataSource
), RecursiveDataSource AS
(
    SELECT [RowNumber], [RowNumber] AS [GroupID], 0 AS [Level]
    FROM DataSourceRaw
    WHERE [STATUS_FLAG] = 1
    UNION ALL
    SELECT C.[RowNumber], R.[RowNumber] + R.[Level], [Level] + 1
    FROM RecursiveDataSource R
    INNER JOIN DataSourceRaw C
        ON R.[RowNumber] = C.[RowNumber] + 1
    WHERE C.[STATUS_FLAG] = 3
        AND C.[HOURDIFF] IS NULL
)
SELECT DSR.*
      ,IIF(DSR.[STATUS_FLAG] = 1, DS.[Days] -1, NULL) AS [Days]
FROM DataSourceRaw DSR
CROSS APPLY
(
    SELECT COUNT(*)
    FROM RecursiveDataSource
    WHERE DSR.[RowNumber]  = [GroupID]
) DS ([Days])
ORDER BY [T_DATE] ASC, [Last Tran] ASC;

enter image description here

解决方案非常简单:

  1. 为每行创建一个行号;我们需要对数据进行排序,并且我们将使用此行ID以在递归部分中执行更快的连接
  2. 创建一个递归CTE - 锚点部分由[STATUS_FLAG] = 1的所有行组成;然后,对于每个人,我们获得的所有smaller行都没有HOURDIFF列的值
  3. 然后获取所有初始数据并使用[STATUS_FLAG] = 1
  4. 计算每行的行数

    很少注意到:

    • 在递归CTE中我们只使用部分列
    • 在递归CTE中,我们使用级别列来匹配组所属的行

    以下是LEFT JOIN的代码:

    WITH DataSourceRaw AS
    (
        SELECT *
              ,ROW_NUMBER() OVER (ORDER BY [T_DATE] ASC, [Last Tran] ASC) AS [RowNumber]
        FROM @DataSource
    ), RecursiveDataSource AS
    (
        SELECT [RowNumber], [RowNumber] AS [GroupID], 0 AS [Level]
        FROM DataSourceRaw
        WHERE [STATUS_FLAG] = 1
        UNION ALL
        SELECT C.[RowNumber], R.[RowNumber] + R.[Level], [Level] + 1
        FROM RecursiveDataSource R
        INNER JOIN DataSourceRaw C
            ON R.[RowNumber] = C.[RowNumber] + 1
        WHERE C.[STATUS_FLAG] = 3
            AND C.[HOURDIFF] IS NULL
    )
    SELECT DSR.*
          ,IIF(DSR.[STATUS_FLAG] = 1, DS.[Days] -1, NULL) AS [Days]
    FROM DataSourceRaw DSR
    LEFT JOIN
    (
        SELECT [GroupID]
              ,COUNT(*)
        FROM RecursiveDataSource
        GROUP BY [GroupID]
    ) DS ([GroupID], [Days])
        ON DSR.[RowNumber] = DS.[GroupID]
    ORDER BY [T_DATE] ASC, [Last Tran] ASC;