T / SQL - 分组/乘法记录

时间:2016-04-12 14:34:42

标签: sql sql-server tsql sql-server-2012

来源日期:

CREATE TABLE #Temp (ID INT Identity(1,1) Primary Key, BeginDate datetime, EndDate datetime, GroupBy INT)
INSERT INTO #Temp
SELECT '2015-06-05 00:00:00.000','2015-06-12 00:00:00.000',7
UNION
SELECT '2015-06-05 00:00:00.000', '2015-06-08 00:00:00.000',7
UNION
SELECT '2015-10-22 00:00:00.000', '2015-10-31 00:00:00.000',7

SELECT *, DATEDIFF(DAY,BeginDate, EndDate) TotalDays FROM #Temp
DROP TABLE #Temp

ID  BeginDate       EndDate         GroupBy     TotalDays
1   6/5/15 0:00     6/8/15 0:00     7           3
2   6/5/15 0:00     6/12/15 0:00    7           7
3   10/22/15 0:00   10/31/15 0:00   7           9

期望输出:

ID  BeginDate       EndDate         GroupBy TotalDays   GroupCnt    GroupNum
1   6/5/15 0:00     6/8/15 0:00     7       3           1           1
2   6/5/15 0:00     6/12/15 0:00    7       7           1           1
3   10/22/15 0:00   10/29/15 0:00   7       9           2           1
3   10/29/15 0:00   10/31/15 0:00   7       9           2           2

目标:

根据ID/BeginDate/EndDate对记录进行分组。 基于GroupBy (# of days) TotalDays (days diff),     如果GroupBy => TotalDays,保持单组记录     否则将组记录(每GroupBy计数1条记录)乘以TotalDays限制。

道歉,如果它令人困惑,但基本上,在上面的例子中,每个组(ID/BeginDate/EndDate)应该有一条记录,其中记录天差异b/w Begin/End date = 7 or less(GroupBy)。

如果天数差异超过7天,则创建另一条记录(每隔7天差异)。

因此,由于前两个记录的天差为7天或更短,因此只有一个记录。

第3条记录的天差为9 (7 + 2)。因此,应该有2条记录(前7天为第1天,另外2天为第2条)。

GroupCNT = how many records there're of the grouped records after applying the above records.

GroupNum基本上是该组的row number

每个记录的GroupBy#可以不同。数据集非常庞大,因此性能非常重要。

我能够弄清楚的一个模式与模数b / w GroupBy和days diff有关。

GroupBy value is < days diff时,模数总是小于GroupBy。当GroupBy value = days diff时,模数始终为0.当GroupBy value > days diff时,模数总是等于GroupBy。我不确定是否/如何使用它来分组/乘以记录以满足要求。

SELECT DISTINCT
  ID
, BeginDate
, EndDate
, GroupBy
, DATEDIFF(DAY,BeginDate, EndDate) TotalDays
, CAST(GroupBy as decimal(18,6))%CAST(DATEDIFF(DAY,BeginDate, EndDate) AS decimal(18,6)) Modulus
, CASE WHEN DATEDIFF(DAY,BeginDate, EndDate) <= GroupBy THEN BeginDate END NewBeginDate
, CASE WHEN DATEDIFF(DAY,BeginDate, EndDate) <= GroupBy THEN EndDate END NewEndDate
FROM #Temp

更新: 忘记提及/包括开始/结束,当记录成倍增加时,将相应地改变。换句话说,开始/结束日期将反映GroupBy - 期望的输出在第3和第4条记录中更清楚地显示我的意思。 此外,GroupCnt / GroupNum的计算不如分组/乘以记录那么重要。

2 个答案:

答案 0 :(得分:3)

你可以使用递归CTE做这样的事情。

;WITH cte AS (
    SELECT  ID,
            BeginDate,
            EndDate,
            GroupBy, 
            DATEDIFF(DAY, BeginDate, EndDate) AS TotalDays, 
            1 AS GroupNum 
    FROM    #Temp
    UNION ALL 
    SELECT  ID,
            BeginDate,
            EndDate,
            GroupBy,
            TotalDays,
            GroupNum + 1
    FROM    cte
    WHERE   GroupNum * GroupBy < TotalDays
)
SELECT  ID,
        BeginDate = CASE WHEN GroupNum = 1 THEN BeginDate 
                         ELSE DATEADD(DAY, GroupBy * (GroupNum - 1), BeginDate)
                         END ,
        EndDate   = CASE WHEN TotalDays <= GroupBy THEN EndDate
                         WHEN DATEADD(DAY, GroupBy * GroupNum, BeginDate) > EndDate THEN EndDate
                         ELSE DATEADD(DAY, GroupBy * GroupNum, BeginDate)
                         END ,
        GroupBy,
        TotalDays,
        COUNT(*) OVER (PARTITION BY ID) GroupCnt,
        GroupNum
FROM    cte
OPTION (MAXRECURSION 0)

cte构建了一个像这样的记录集。

ID          BeginDate               EndDate                 GroupBy     TotalDays   GroupNum
----------- ----------------------- ----------------------- ----------- ----------- -----------
1           2015-06-05 00:00:00.000 2015-06-08 00:00:00.000 7           3           1
2           2015-06-05 00:00:00.000 2015-06-12 00:00:00.000 7           7           1
3           2015-10-22 00:00:00.000 2015-10-31 00:00:00.000 7           9           1
3           2015-10-22 00:00:00.000 2015-10-31 00:00:00.000 7           9           2

然后你只需要这个并使用一些case语句来确定开始和结束日期应该是什么。

你最终应该

ID          BeginDate               EndDate                 GroupBy     TotalDays   GroupCnt    GroupNum
----------- ----------------------- ----------------------- ----------- ----------- ----------- -----------
1           2015-06-05 00:00:00.000 2015-06-08 00:00:00.000 7           3           1           1
2           2015-06-05 00:00:00.000 2015-06-12 00:00:00.000 7           7           1           1
3           2015-10-22 00:00:00.000 2015-10-29 00:00:00.000 7           9           2           1
3           2015-10-29 00:00:00.000 2015-10-31 00:00:00.000 7           9           2           2

由于您使用的是SQL 2012,因此您还可以在最终查询中使用LAG和LEAD函数。

;WITH cte AS (
    SELECT  ID,
            BeginDate,
            EndDate,
            GroupBy, 
            DATEDIFF(DAY, BeginDate, EndDate) AS TotalDays, 
            1 AS GroupNum 
    FROM    #Temp
    UNION ALL 
    SELECT  ID,
            BeginDate,
            EndDate,
            GroupBy,
            TotalDays,
            GroupNum + 1
    FROM    cte
    WHERE   GroupNum * GroupBy < TotalDays
)

SELECT  ID,
        BeginDate = COALESCE(LAG(BeginDate) OVER (PARTITION BY ID ORDER BY GroupNum) + GroupBy * (GroupNum - 1), BeginDate),
        EndDate   = COALESCE(LEAD(BeginDate) OVER (PARTITION BY ID ORDER BY GroupNum) + GroupBy * GroupNum, EndDate),
        GroupBy,
        TotalDays,
        COUNT(*) OVER (PARTITION BY ID) GroupCnt,
        GroupNum
FROM    cte
OPTION (MAXRECURSION 0)

答案 1 :(得分:1)

CREATE TABLE dim_number (id INT);
INSERT INTO dim_number VALUES ((0), (1), (2), (3)); -- Populate this to a large number

SELECT
    #Temp.Id,
    CASE WHEN dim_number.id = 0
         THEN #Temp.BeginDate
         ELSE DATEADD(DAY,  dim_number.id      * #Temp.GroupBy, #Temp.BeginDate)
    END                                                             AS BeginDate,
    CASE WHEN dim_number.id = parts.count
         THEN #Temp.EndDate
         ELSE DATEADD(DAY, (dim_number.id + 1) * #Temp.GroupBy, #Temp.BeginDate)
    END                                                             AS EndDate,
    #Temp.GroupBy                                                   AS GroupBy,
    DATEDIFF(DAY, #Temp.BeginDate, #Temp.EndDate)                   AS TotalDays,
    parts.count + 1                                                 AS GroupCnt,
    dim_number.id + 1                                               AS GroupNum
FROM
    #Temp
CROSS APPLY
    (SELECT DATEDIFF(DAY, #Temp.BeginDate, #Temp.EndDate) / #Temp.GroupBy AS count)   AS parts
INNER JOIN
    dim_number
        ON  dim_number.id >= 0
        AND dim_number.id <= parts.count