我需要从以下数据集中确定开始日期和结束日期:
item date cost
12345 01/01/15 2.00
12345 01/02/15 2.00
12345 01/03/15 2.00
12345 01/04/15 2.00
12345 01/05/15 2.00
12345 01/06/15 2.00
12345 01/07/15 1.50
12345 01/08/15 1.50
12345 01/09/15 1.50
12345 01/10/15 1.50
12345 01/11/15 1.50
12345 01/12/15 1.50
12345 01/13/15 1.50
12345 01/14/15 2.00
12345 01/15/15 2.00
12345 01/16/15 2.00
12345 01/17/15 2.00
12345 01/18/15 2.00
12345 01/19/15 2.00
12345 01/20/15 2.00
12345 01/26/15 2.00
12345 01/27/15 2.00
12345 01/28/15 2.00
12345 01/29/15 2.00
如果可能,我想要的是以下输出:
item start end cost
12345 01/01/15 01/06/15 2.00
12345 01/07/15 01/13/15 1.50
12345 01/14/15 01/20/15 2.00
12345 01/26/15 01/29/15 2.00
基本上,任何时候更改,或任何给定成本的日期之间有超过4天的差距。
感谢。
答案 0 :(得分:0)
这是你可以做到的一种方式:
DECLARE @myTable TABLE (item INT, date DATE, cost DECIMAL(5, 2));
INSERT @myTable VALUES (12345, '01/01/15', 2.00), (12345, '01/02/15', 2.00)
, (12345, '01/03/15', 2.00), (12345, '01/04/15', 2.00), (12345, '01/05/15', 2.00)
, (12345, '01/06/15', 2.00), (12345, '01/07/15', 1.50), (12345, '01/08/15', 1.50)
, (12345, '01/09/15', 1.50), (12345, '01/10/15', 1.50), (12345, '01/11/15', 1.50)
, (12345, '01/12/15', 1.50), (12345, '01/13/15', 1.50), (12345, '01/14/15', 2.00)
, (12345, '01/15/15', 2.00), (12345, '01/16/15', 2.00), (12345, '01/17/15', 2.00)
, (12345, '01/18/15', 2.00), (12345, '01/19/15', 2.00), (12345, '01/20/15', 2.00)
, (12345, '01/26/15', 2.00), (12345, '01/27/15', 2.00), (12345, '01/28/15', 2.00)
, (12345, '01/29/15', 2.00);
WITH CTE1 AS (
SELECT item, date, cost, ROW_NUMBER() OVER (PARTITION BY item ORDER BY date) RN
FROM @myTable)
, CTE2 AS (
SELECT item, T.date TDate, T.cost Tcost, prevRow.date SDate, MD.maxDate, ROW_NUMBER() OVER (PARTITION BY item ORDER BY T.date) RN2
FROM CTE1 T
OUTER APPLY (
SELECT date, cost
FROM CTE1
WHERE RN = T.RN - 1
AND item = T.item
) prevRow
OUTER APPLY (
SELECT MAX(date) FROM CTE1
) MD(maxDate)
WHERE CASE WHEN DATEDIFF(DAY, prevRow.date, T.date) <= 4 THEN T.date END IS NULL
OR CASE WHEN prevRow.cost = T.cost THEN T.cost END IS NULL)
SELECT item, TDate startDate, ISNULL(nextRow.SDate, C.maxDate) endDate, TCost cost
FROM CTE2 C
OUTER APPLY (
SELECT SDate
FROM CTE2
WHERE RN2 = C.RN2 + 1
AND item = C.item) nextRow;
基本上,您希望将每一行与下一行进行比较,以检查是否存在成本差异或超过4天的日期差异。在较新版本的SQL Server(2012年起)中,您可以使用LEAD / LAG窗口函数,但我认为在SQL Server 2008中最简单的方法是为每行分配一个行号并使用apply。
CTE1指定行号。 CTE2用于与下一行进行比较。 最后的select语句会拉出每个期间的开始和结束日期,其中费用会发生变化,或者日期跳过大于4天。
答案 1 :(得分:0)
以下是@ZLK提供的变体,但仅使用连接而不应用。我们再次使用行号。第二个CTE获取第一行和最后一行以及每行有变化的行。最终查询负责选择正确的最终日期。
--set up data
declare @table table (item int, tdate date, cost decimal(5,2));
insert @table values
(12345,'01/01/15',2.00),
(12345,'01/02/15',2.00),
(12345,'01/03/15',2.00),
(12345,'01/04/15',2.00),
(12345,'01/05/15',2.00),
(12345,'01/06/15',2.00),
(12345,'01/07/15',1.50),
(12345,'01/08/15',1.50),
(12345,'01/09/15',1.50),
(12345,'01/10/15',1.50),
(12345,'01/11/15',1.50),
(12345,'01/12/15',1.50),
(12345,'01/13/15',1.50),
(12345,'01/14/15',2.00),
(12345,'01/15/15',2.00),
(12345,'01/16/15',2.00),
(12345,'01/17/15',2.00),
(12345,'01/18/15',2.00),
(12345,'01/19/15',2.00),
(12345,'01/20/15',2.00),
(12345,'01/26/15',2.00),
(12345,'01/27/15',2.00),
(12345,'01/28/15',2.00),
(12345,'01/29/15',2.00);
--select * from @table order by tdate;
--query
with cte as (
select *, row_number() over (partition by item order by tdate) row_num
from @table
),
cte2 as (
select c.item, c.tdate, c_pre.tdate pre_date, c_post.tdate post_date, c.cost, row_number() over (partition by c.item order by c.tdate) row_num
from cte c
left join cte c_pre
on c_pre.row_num = c.row_num - 1
left join cte c_post
on c_post.row_num = c.row_num + 1
where c_pre.row_num is null --first row
or c_post.row_num is null --last row
or c_pre.cost != c.cost --cost difference
or datediff(d, c_pre.tdate, c.tdate) > 4 --more than 4 days
)
select c.item, c.tdate start,
case when c_post.post_date is null then c_post.tdate else c_post.pre_date end [end], --adjustment for last row
c.cost
from cte2 c
left join cte2 c_post
on c_post.row_num = c.row_num + 1
where c.post_date is not null
order by c.tdate;