创建满足给定条件的连续日子组

时间:2012-02-14 10:04:23

标签: sql sql-server-2008

我在SQL Server中拥有以下数据结构:

ID  Date        Allocation
 1, 2012-01-01, 0
 2, 2012-01-02, 2
 3, 2012-01-03, 0
 4, 2012-01-04, 0
 5, 2012-01-05, 0
 6, 2012-01-06, 5

我需要做的是获取Allocation = 0的所有连续日期,并采用以下形式:

Start Date    End Date     DayCount
2012-01-01    2012-01-01   1
2012-01-03    2012-01-05   3

是否可以在SQL中执行此操作,如果是这样的话?

6 个答案:

答案 0 :(得分:3)

以下是一种方法。这个解决方案的要点是

  • 使用CTE获取Allocation = 0
  • 所有连续开始和结束的列表
  • 使用ROW_NUMBER窗口功能根据开始和结束分配rownumbers。
  • 仅选择ROW_NUMBERS等于1的记录。
  • 使用DATEDIFF计算DayCount

SQL声明

;WITH r AS (
  SELECT  StartDate = Date, EndDate = Date
  FROM    YourTable
  WHERE   Allocation = 0
  UNION ALL
  SELECT  r.StartDate, q.Date
  FROM    r
          INNER JOIN YourTable q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
  WHERE   q.Allocation = 0          
)
SELECT  [Start Date] = s.StartDate
        , [End Date ] = s.EndDate
        , [DayCount] = DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM    (
          SELECT  *
                  , rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
                  , rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
          FROM    r          
        ) s
WHERE   s.rn1 = 1
        AND s.rn2 = 1
OPTION  (MAXRECURSION 0)

测试脚本

;WITH q (ID, Date, Allocation) AS (
  SELECT * FROM (VALUES
    (1, '2012-01-01', 0)
    , (2, '2012-01-02', 2)
    , (3, '2012-01-03', 0)
    , (4, '2012-01-04', 0)
    , (5, '2012-01-05', 0)
    , (6, '2012-01-06', 5)
  ) a (a, b, c)
)
, r AS (
  SELECT  StartDate = Date, EndDate = Date
  FROM    q
  WHERE   Allocation = 0
  UNION ALL
  SELECT  r.StartDate, q.Date
  FROM    r
          INNER JOIN q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
  WHERE   q.Allocation = 0          
)
SELECT  s.StartDate, s.EndDate, DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM    (
          SELECT  *
                  , rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
                  , rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
          FROM    r          
        ) s
WHERE   s.rn1 = 1
        AND s.rn2 = 1
OPTION  (MAXRECURSION 0)

答案 1 :(得分:3)

在这个答案中,我假设" id"字段按增加日期排序时连续对行进行编号,就像在示例数据中一样。 (如果不存在,可以创建这样的列。)

这是描述herehere的技术示例。

1)将表格加入相邻的" id"值。这使相邻的行成对。选择"分配"的行。领域发生了变化。将结果存储在临时表中,同时保持运行索引。

SET @idx = 0;
CREATE TEMPORARY TABLE boundaries
SELECT
   (@idx := @idx + 1) AS idx,
   a1.date AS prev_end,
   a2.date AS next_start,
   a1.allocation as allocation
FROM allocations a1
JOIN allocations a2
ON (a2.id = a1.id + 1)
WHERE a1.allocation != a2.allocation;

这会为您提供一个表格,其中包含"上一期间的结束","下一期间的开始"以及"'分配& #39;在前一个时期"在每一行:

+------+------------+------------+------------+
| idx  | prev_end   | next_start | allocation |
+------+------------+------------+------------+
|    1 | 2012-01-01 | 2012-01-02 |          0 |
|    2 | 2012-01-02 | 2012-01-03 |          2 |
|    3 | 2012-01-05 | 2012-01-06 |          0 |
+------+------------+------------+------------+

2)我们需要在同一行中每个句点的开始和结束,因此我们需要再次组合相邻的行。通过创建第二个临时表来执行此操作,例如boundariesidx字段1更大:

+------+------------+------------+
| idx  | prev_end   | next_start |
+------+------------+------------+
|    2 | 2012-01-01 | 2012-01-02 |
|    3 | 2012-01-02 | 2012-01-03 |
|    4 | 2012-01-05 | 2012-01-06 |
+------+------------+------------+

现在加入idx字段,我们就会得到答案:

SELECT
  boundaries2.next_start AS start,
  boundaries.prev_end AS end,
  allocation
FROM boundaries
JOIN boundaries2
USING(idx);

+------------+------------+------------+
| start      | end        | allocation |
+------------+------------+------------+
| 2012-01-02 | 2012-01-02 |          2 |
| 2012-01-03 | 2012-01-05 |          0 |
+------------+------------+------------+

**请注意,这个答案会得到"内部"期间正确但错过了两个"边缘"在开始时分配= 0并且在结束时分配= 5的时段。这些可以使用UNION条款引入,但我想提出没有这种复杂性的核心思想。

答案 2 :(得分:1)

CTE的替代方式,但没有ROW_NUMBER(),

示例数据:

if object_id('tempdb..#tab') is not null
    drop table #tab

create table #tab (id int, date datetime, allocation int)

insert into #tab
select 1, '2012-01-01', 0 union
select 2, '2012-01-02', 2 union
select 3, '2012-01-03', 0 union
select 4, '2012-01-04', 0 union
select 5, '2012-01-05', 0 union
select 6, '2012-01-06', 5 union
select 7, '2012-01-07', 0 union
select 8, '2012-01-08', 5 union
select 9, '2012-01-09', 0 union
select 10, '2012-01-10', 0

查询:

;with cte(s_id, e_id, b_id) as (
    select s.id, e.id, b.id
    from #tab s
    left join #tab e on dateadd(dd, 1, s.date) = e.date and e.allocation = 0
    left join #tab b on dateadd(dd, -1, s.date) = b.date and b.allocation = 0
    where s.allocation = 0
)
select ts.date as [start date], te.date as [end date], count(*) as [day count] from (
    select c1.s_id as s, (
        select min(s_id) from cte c2 
        where c2.e_id is null and c2.s_id >= c1.s_id
    ) as e
    from cte c1
    where b_id is null
) t
join #tab t1 on t1.id between t.s and t.e and t1.allocation = 0
join #tab ts on ts.id = t.s
join #tab te on te.id = t.e
group by t.s, t.e, ts.date, te.date

Live example at data.SE

答案 3 :(得分:1)

使用此样本数据:

CREATE TABLE MyTable (ID INT, Date DATETIME, Allocation INT);
INSERT INTO MyTable VALUES (1, {d '2012-01-01'}, 0);
INSERT INTO MyTable VALUES (2, {d '2012-01-02'}, 2);
INSERT INTO MyTable VALUES (3, {d '2012-01-03'}, 0);
INSERT INTO MyTable VALUES (4, {d '2012-01-04'}, 0);
INSERT INTO MyTable VALUES (5, {d '2012-01-05'}, 0);
INSERT INTO MyTable VALUES (6, {d '2012-01-06'}, 5);
GO

试试这个:

WITH DateGroups (ID, Date, Allocation, SeedID) AS (
    SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, MyTable.ID
      FROM MyTable
      LEFT JOIN MyTable Prev ON Prev.Date = DATEADD(d, -1, MyTable.Date) 
                            AND Prev.Allocation = 0
     WHERE Prev.ID IS NULL
       AND MyTable.Allocation = 0
    UNION ALL
    SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, DateGroups.SeedID
      FROM MyTable
      JOIN DateGroups ON MyTable.Date = DATEADD(d, 1, DateGroups.Date)
     WHERE MyTable.Allocation = 0

), StartDates (ID, StartDate, DayCount) AS (
    SELECT SeedID, MIN(Date), COUNT(ID)
      FROM DateGroups
     GROUP BY SeedID

), EndDates (ID, EndDate) AS (
    SELECT SeedID, MAX(Date)
      FROM DateGroups
     GROUP BY SeedID

)
SELECT StartDates.StartDate, EndDates.EndDate, StartDates.DayCount
  FROM StartDates
  JOIN EndDates ON StartDates.ID = EndDates.ID;

查询的第一部分是递归SELECT,它由allocate = 0的所有行锚定,并且前一天不存在或者没有分配!= 0.这实际上返回ID:1和3这是您想要返回的时间段的开始日期。

此同一查询的递归部分从锚点行开始,并查找也具有allocation = 0的所有后续日期.GrainID通过所有迭代跟踪锚定ID。

到目前为止的结果是:

ID          Date                    Allocation  SeedID
----------- ----------------------- ----------- -----------
1           2012-01-01 00:00:00.000 0           1
3           2012-01-03 00:00:00.000 0           3
4           2012-01-04 00:00:00.000 0           3
5           2012-01-05 00:00:00.000 0           3

下一个子查询使用简单的GROUP BY过滤掉每个SeedID的所有开始日期,并计算天数。

最后一个子查询对结束日期做了同样的事情,但这次不需要日期计数,因为我们已经有了这个。

最终的SELECT查询将这两个结合在一起以组合开始日期和结束日期,并将它们与日期计数一起返回。

答案 4 :(得分:1)

如果适合您,请尝试一下 您的DATE的SDATE与您的表格保持一致。

SELECT SDATE,
CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) >0 THEN(
CASE WHEN (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) IS NULL THEN SDATE
ELSE (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) END
)ELSE (SELECT SDATE FROM TABLE1 WHERE ID = (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID ))END AS EDATE
,CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) <0 THEN 
(SELECT COUNT(*) FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID )) ELSE
(SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) END AS DAYCOUNT
FROM TABLE1 TBL1 WHERE ALLOCATION = 0
AND (((SELECT ALLOCATION FROM TABLE1 WHERE ID=(SELECT MAX(ID) FROM TABLE1  WHERE ID < TBL1.ID))<> 0 ) OR (SELECT MAX(ID) FROM TABLE1  WHERE ID < TBL1.ID)IS NULL); 

答案 5 :(得分:0)

没有CTE的解决方案:

SELECT a.aDate AS StartDate
    , MIN(c.aDate) AS EndDate
    , (datediff(day, a.aDate, MIN(c.aDate)) + 1) AS DayCount
FROM (
    SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
    JOIN table1 y ON y.aDate <= x.aDate
    GROUP BY x.id, x.aDate, x.allocation
) AS a
LEFT JOIN (
    SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
    JOIN table1 y ON y.aDate <= x.aDate
    GROUP BY x.id, x.aDate, x.allocation
) AS b ON a.idn = b.idn + 1 AND b.allocation = a.allocation
LEFT JOIN (
    SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
    JOIN table1 y ON y.aDate <= x.aDate
    GROUP BY x.id, x.aDate, x.allocation
) AS c ON a.idn <= c.idn AND c.allocation = a.allocation
LEFT JOIN (
    SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
    JOIN table1 y ON y.aDate <= x.aDate
    GROUP BY x.id, x.aDate, x.allocation
) AS d ON c.idn = d.idn - 1 AND d.allocation = c.allocation
WHERE b.idn IS NULL AND c.idn IS NOT NULL AND d.idn IS NULL AND a.allocation = 0
GROUP BY a.aDate

Example