我有一个MS SQL 2008数据库表,如下所示:
Registration | Date | DriverID | TrailerID
一些数据的示例如下:
AB53EDH,2013/07/03 10:00,54,23
AB53EDH,2013/07/03 10:01,54,23
...
AB53EDH,2013/07/03 10:45,54,23
AB53EDH,2013/07/03 10:46,54,NULL <-- Trailer changed
AB53EDH,2013/07/03 10:47,54,NULL
...
AB53EDH,2013/07/03 11:05,54,NULL
AB53EDH,2013/07/03 11:06,54,102 <-- Trailer changed
AB53EDH,2013/07/03 11:07,54,102
...
AB53EDH,2013/07/03 12:32,54,102
AB53EDH,2013/07/03 12:33,72,102 <-- Driver changed
AB53EDH,2013/07/03 12:34,72,102
如您所见,数据表示在任何时间点哪个驱动程序和哪个预告片附加到哪个注册。我想要做的是生成一个报告,其中包含每个驱动程序和预告片组合都处于活动状态的句点。因此对于上面的示例数据,我想要生成如下所示的内容:
Registration,StartDate,EndDate,DriverID,TrailerID
AB53EDH,2013/07/03 10:00,2013/07/03 10:45,54,23
AB53EDH,2013/07/03 10:46,2013/07/03 11:05,54,NULL
AB53EDH,2013/07/03 11:06,2013/07/03 12:32,54,102
AB53EDH,2013/07/03 12:33,2013/07/03 12:34,72,102
你将如何通过SQL实现这一目标?
更新:感谢目前为止的答案。不幸的是,当我将它应用于我的生产数据时,他们停止了工作。到目前为止提交的查询在部分数据上应用时无法正常工作。
以下是一些示例查询,用于生成数据表并使用上面的虚拟数据填充它。此处的数据比上例中的数据更多:驱动程序,预告片组合54,23和54,NULL已重复,以确保查询识别出这些是两个不同的组。我还使用不同的日期范围复制了相同的数据三次,以便测试在部分数据集上运行时查询是否有效:
CREATE TABLE [dbo].[TempTable](
[Registration] [nvarchar](50) NOT NULL,
[Date] [datetime] NOT NULL,
[DriverID] [int] NULL,
[TrailerID] [int] NULL
)
INSERT INTO dbo.TempTable
VALUES
('AB53EDH','2013/07/03 10:00', 54,23),
('AB53EDH','2013/07/03 10:01', 54,23),
('AB53EDH','2013/07/03 10:45', 54,23),
('AB53EDH','2013/07/03 10:46', 54,NULL),
('AB53EDH','2013/07/03 10:47', 54,NULL),
('AB53EDH','2013/07/03 11:05', 54,NULL),
('AB53EDH','2013/07/03 11:06', 54,102),
('AB53EDH','2013/07/03 11:07', 54,102),
('AB53EDH','2013/07/03 12:32', 54,102),
('AB53EDH','2013/07/03 12:33', 72,102),
('AB53EDH','2013/07/03 12:34', 72,102),
('AB53EDH','2013/07/03 13:00', 54,102),
('AB53EDH','2013/07/03 13:01', 54,102),
('AB53EDH','2013/07/03 13:02', 54,102),
('AB53EDH','2013/07/03 13:03', 54,102),
('AB53EDH','2013/07/03 13:04', 54,23),
('AB53EDH','2013/07/03 13:05', 54,23),
('AB53EDH','2013/07/03 13:06', 54,23),
('AB53EDH','2013/07/03 13:07', 54,NULL),
('AB53EDH','2013/07/03 13:08', 54,NULL),
('AB53EDH','2013/07/03 13:09', 54,NULL),
('AB53EDH','2013/07/03 13:10', 54,NULL),
('AB53EDH','2013/07/03 13:11', NULL,NULL)
INSERT INTO dbo.TempTable
SELECT Registration, DATEADD(M, -1, Date), DriverID, TrailerID
FROM dbo.TempTable
WHERE Date > '2013/07/01'
INSERT INTO dbo.TempTable
SELECT Registration, DATEADD(M, 1, Date), DriverID, TrailerID
FROM dbo.TempTable
WHERE Date > '2013/07/01'
答案 0 :(得分:3)
此查询使用CTE:
链接到SQL Fiddle
以下代码:
;WITH c AS (
-- Group records by Registration, assign row numbers in order of date
SELECT
ROW_NUMBER() OVER (
PARTITION BY Registration
ORDER BY Registration, [Date])
AS Rn,
Registration,
[Date],
DriverID,
TrailerID
FROM
TempTable
)
,c2 AS (
-- Self join to table to get Driver and Trailer from previous record
SELECT
t1.Rn,
t1.Registration,
t1.[Date],
t1.DriverID,
t1.TrailerID,
t2.DriverID AS PrevDriverID,
t2.TrailerID AS PrevTrailerID
FROM
c t1
LEFT OUTER JOIN
c t2
ON
t1.Registration = t2.Registration
AND
t2.Rn = t1.Rn - 1
)
,c3 AS (
-- Use INTERSECT to determine if this record is new in sequence
SELECT
Rn,
Registration,
[Date],
DriverID,
TrailerID,
CASE WHEN NOT EXISTS (
SELECT DriverID, TrailerID
INTERSECT
SELECT PrevDriverID, PrevTrailerID)
THEN 1
ELSE 0
END AS IsNew
FROM c2
)
-- For all new records in sequence,
-- get the last date logged before a new record appeared
SELECT
Registration,
[Date] AS StartDate,
COALESCE (
(
SELECT TOP 1 [Date]
FROM c3
WHERE Registration = t.Registration
AND Rn < (
SELECT TOP 1 Rn
FROM c3
WHERE Registration = t.Registration
AND Rn > t.Rn
AND IsNew = 1
ORDER BY Rn )
ORDER BY Rn DESC
)
, [Date]) AS EndDate,
DriverID,
TrailerID
FROM
c3 t
WHERE
IsNew = 1
ORDER BY
Registration,
StartDate
答案 1 :(得分:1)
尝试 - :
DECLARE @TempTable AS TABLE (
[Registration] [nvarchar](50) NOT NULL,
[Date] [datetime] NOT NULL,
[DriverID] [int] NULL,
[TrailerID] [int] NULL
)
INSERT INTO @TempTable
VALUES
('AB53EDH','2013-07-03 10:00', 54,23),
('AB53EDH','2013-07-03 10:01', 54,23),
('AB53EDH','2013-07-03 10:45', 54,23),
('AB53EDH','2013-07-03 10:46', 54,nULL),
('AB53EDH','2013-07-03 10:47', 54,NULL),
('AB53EDH','2013-07-03 11:05', 54,NULL),
('AB53EDH','2013-07-03 11:06', 54,102),
('AB53EDH','2013-07-03 11:07', 54,102),
('AB53EDH','2013-07-03 12:32', 54,102),
('AB53EDH','2013-07-03 12:33', 72,102),
('AB53EDH','2013-07-03 12:34', 72,102)
SELECT t1.Registration, MIN(t1.Date) AS StartDate, MAX(t1.date) AS EndDate, t1.DriverID, t1.TrailerID
FROM @TempTable AS t1
INNER JOIN @TempTable AS t2
ON t1.Registration = t2.Registration AND (t1.DriverID = t2.DriverID OR t1.TrailerID = t2.TrailerID)
GROUP BY t1.Registration, t1.DriverID, t1.TrailerID
ORDER BY MIN(t1.Date)
答案 2 :(得分:1)
这是一种使用相关子查询的方法:
with tt as (
select tt.*,
(select top 1 date
from TempTable tt2
where tt2.Registration = tt.Registration and
tt2.DriverID = tt.DriverID and
(tt2.TrailerID = tt.TrailerID or tt2.TrailerID is null and tt.TrailerID is null) and
tt2.Date < tt.Date
order by date desc
) prevDate
from TempTable tt
)
select registration, min(date) as startdate, max(date) as enddate, driverid, trailerid
from (select tt.*,
(select top 1 date
from tt tt3
where prevDate is NULL and
tt3.Date <= tt.date
order by Date desc
) as grp
from TempTable tt
) tt
group by grp, Registration, DriverID, trailerid;
CTE在注册,驱动程序和trailerid上执行lag(date)
,生成记录的上一个日期。这是记录序列开头的NULL
。
子查询然后在给定记录之前或之前的NULL记录上查找最新日期。这充当分组变量。此时序列中的所有内容都具有相同的grp
。
最终查询将其聚合为您想要的格式。
这是一个复杂的查询。 SQL Server 2012中的语法可以使用lag()
和累积聚合函数进行简化。使用这些功能,您可以采用基本相同的方法。
编辑:
哎哟。上述查询在计算上一个日期时存在逻辑错误。该修复需要假设日期在数据中是唯一的。
上面的错误是它查找列的三元组匹配的上一个日期。愚蠢,愚蠢,愚蠢。因为可以存在匹配但在数据中较早的三元组。相反,它需要获取上一个日期,然后查看三元组是否匹配。
以下通过附加连接实现此目的。它在SQL Fiddle上运行here。
with tt as (
select tt.*, tt3.date as PrevDate
from (select tt.*,
(select top 1 date
from TempTable tt2
where tt2.date < tt.date
order by date desc
) prevDate1
from TempTable tt
) tt left outer join
TempTable tt3
on tt.prevdate1 = tt3.date and
tt3.Registration = tt.Registration and
tt3.DriverID = tt.DriverID and
(tt3.TrailerID = tt.TrailerID or tt3.TrailerID is null and tt.TrailerID is null)
)
select registration, count(*), min(date) as startdate, max(date) as enddate, driverid, trailerid
from (select tt.*,
(select top 1 date
from tt tt3
where prevDate is NULL and
tt3.Date <= tt.date
order by Date desc
) as grp
from TempTable tt
) tt
group by grp, Registration, DriverID, trailerid;