我有一张如下表格。
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 3/1/2009 1 C
1 3/1/2009 4/1/2009 1 C
2 1/1/2010 2/1/2010 0 D
2 3/1/2010 4/1/2010 1 D
日期范围用于了解其余属性有效的时间段,我的问题是有几个连续的时间范围,其中属性ramain相同,我想要获得相同的数据,但没有重复的行。
从前面的例子中,我预期的最终结果是这样的:
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 4/1/2009 1 C
2 1/1/2010 2/1/2010 0 D
2 3/1/2010 4/1/2010 1 D
我所做的是将第2行和第3行合并为一个(除日期之外的所有属性都相同),但我保留了第2行的StartDate和第3行的endDate。
我首先想到的是按照这个
获取MAX和MIN的值进行分组SELECT ID, MIN(StartDate), MAX(EndDate), attributeA, attributeB
FROM MyTable
Group BY ID, AttributeA, AttributeB
但是一旦我运行它,我意识到当属性改变几次并回到它们的原始值时,我最终会重叠间隔。我已经被困了一段时间,试图弄清楚如何解决这个问题。
以下是我在之前发言中所说的一个例子。
初始数据如下所示:
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 3/1/2009 0 D
1 3/1/2009 4/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
1 6/1/2010 6/1/2009 0 D
对结果进行分组最终会像以下一样
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 6/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
我想要获得的是这个
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 4/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
1 6/1/2010 6/1/2009 0 D
欢迎任何帮助:)
编辑:我将很快上传一些示例数据,以使我的问题更容易理解。
EDIT2:Here's a script with some of my data。从那个样本我想得到的是以下几行。
ID StartDate EndDate A B C D E F
-- --------- ------- -- -- -- -- -- --
708513 1980-01-01 2006-07-23 15 ASDB A ACT 130 0
708513 2006-07-24 2009-12-08 15 ASDB A ACT 130 2
708513 2009-12-09 2010-01-12 0 ASDB A ACT 130 2
708513 2010-01-13 2079-05-30 15 ASDB A ACT 130 2
答案 0 :(得分:1)
编辑,评论后。尝试:
;with cte as (
select m1.ID, m1.StartDate, m1.EndDate, m1.a, m1.b, m1.c, m1.d, m1.e, m1.f
from sampledata m1
where not exists
(select null from sampledata m0
where m1.ID = m0.ID and
m1.a = m0.a and
m1.b = m0.b and
m1.c = m0.c and
m1.d = m0.d and
m1.e = m0.e and
m1.f = m0.f and
dateadd(day, -1, m1.StartDate) = m0.EndDate)
union all
select m1.ID, m1.StartDate, m2.EndDate, m1.a, m1.b, m1.c, m1.d, m1.e, m1.f
from cte m1
join sampledata m2
on m1.ID = m2.ID and
m1.a = m2.a and
m1.b = m2.b and
m1.c = m2.c and
m1.d = m2.d and
m1.e = m2.e and
m1.f = m2.f and
dateadd(day, 1, m1.EndDate) = m2.StartDate)
select ID, StartDate, max(EndDate) EndDate, a, b, c, d, e, f
from cte
group by ID, StartDate, a, b, c, d, e, f
OPTION (MAXRECURSION 32767)
答案 1 :(得分:0)
如果有人感兴趣,我制作了一个没有递归的版本。我没有真正弄清楚如何添加上一个示例中未使用的额外列。
IF OBJECT_ID('tempdb..#test') IS NOT NULL drop table #test
create table #test (
id int identity(1, 1)
, ship nvarchar(64)
, color nvarchar(16)
, [length] int
, height int
, [type] nvarchar(16)
, country nvarchar(16)
, StartDate date
)
insert into #test(ship, color, [length], height, [type], country, StartDate)
values
('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-01-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-02-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-03-01')
, ('Ship 1', 'Red', 200, 13, 'motorboat', 'sweden', '2019-11-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-12-01')
, ('Ship 2', 'Green', 400, 27, 'RoRo', 'denmark', '2019-02-01')
;
with step1 as (
select t.*
, [EndDate] = dateadd(day, -1, lead(t.StartDate, 1, '9999-12-31') over(partition by t.ship order by t.StartDate))
from #test t
where 1 = 1
)
, step2 as (
select t.*
-- Check if preceeding row with same attribute has enddate between this startdate
, [IdenticalPreceeding] = case
when t.StartDate = dateadd(day, 1, lag(t.EndDate, 1, '1900-01-01') over (partition by t.ship, t.color, t.[length], t.height, t.[type], t.country order by t.Startdate)) then 1
else 0
end
from step1 t
)
select t.*
, [EndDateFinal] = dateadd(day, -1, lead(t.StartDate, 1, '9999-12-31') over(partition by t.ship order by t.StartDate))
from step2 t
where 1 = 1
-- Remove rows with identical preceeders
and t.IdenticalPreceeding = 0
order by t.ship
, t.StartDate