我正在使用SQL Server 2008 R2。我有一个名为EmployeeHistory的表,其中包含以下结构和示例数据:
EmployeeID Date DepartmentID SupervisorID
10001 20130101 001 10009
10001 20130909 001 10019
10001 20131201 002 10018
10001 20140501 002 10017
10001 20141001 001 10015
10001 20141201 001 10014
请注意,随着时间的推移,员工10001已经更换了2个部门和几个主管。我想要做的是列出按日期字段排序的每个部门中该员工的工作的开始和结束日期。因此,输出将如下所示:
EmployeeID DateStart DateEnd DepartmentID
10001 20130101 20131201 001
10001 20131201 20141001 002
10001 20141001 NULL 001
我打算使用以下查询对数据进行分区,但失败了。部门从001变为002然后又变回001.显然我不能通过DepartmentID进行分区......我确信我忽略了显而易见的事情。有帮助吗?提前谢谢你。
SELECT * ,ROW_NUMBER() OVER (PARTITION BY EmployeeID, DepartmentID
ORDER BY [Date]) RN FROM EmployeeHistory
答案 0 :(得分:9)
我会做这样的事情:
;WITH x
AS (SELECT *,
Row_number()
OVER(
partition BY employeeid
ORDER BY datestart) rn
FROM employeehistory)
SELECT *
FROM x x1
LEFT OUTER JOIN x x2
ON x1.rn = x2.rn + 1
或许它可能是x2.rn - 1.你必须看到。无论如何,你明白了。一旦你自己加入了表格,就可以过滤,分组,排序等,以获得你需要的东西。
答案 1 :(得分:5)
有点牵扯。最简单的是引用我为您创建的this SQL Fiddle产生确切结果。有一些方法可以改善它的性能或其他考虑因素,但这有望至少比某些替代方案更清晰。
要点是,首先获得数据的规范排名,然后使用它将数据分组,然后找到每个组的结束日期,然后消除任何中间行。 ROW_NUMBER()和CROSS APPLY在可读性方面做了很多帮助。
编辑2019:
事实上,由于某种原因,SQL Fiddle似乎确实被破坏了,但它似乎是SQL Fiddle网站上的一个问题。这是一个完整的版本,刚刚在SQL Server 2016上进行了测试:CREATE TABLE Source
(
EmployeeID int,
DateStarted date,
DepartmentID int
)
INSERT INTO Source
VALUES
(10001,'2013-01-01',001),
(10001,'2013-09-09',001),
(10001,'2013-12-01',002),
(10001,'2014-05-01',002),
(10001,'2014-10-01',001),
(10001,'2014-12-01',001)
SELECT *,
ROW_NUMBER() OVER (PARTITION BY EmployeeID ORDER BY DateStarted) AS EntryRank,
newid() as GroupKey,
CAST(NULL AS date) AS EndDate
INTO #RankedData
FROM Source
;
UPDATE #RankedData
SET GroupKey = beginDate.GroupKey
FROM #RankedData sup
CROSS APPLY
(
SELECT TOP 1 GroupKey
FROM #RankedData sub
WHERE sub.EmployeeID = sup.EmployeeID AND
sub.DepartmentID = sup.DepartmentID AND
NOT EXISTS
(
SELECT *
FROM #RankedData bot
WHERE bot.EmployeeID = sup.EmployeeID AND
bot.EntryRank BETWEEN sub.EntryRank AND sup.EntryRank AND
bot.DepartmentID <> sup.DepartmentID
)
ORDER BY DateStarted ASC
) beginDate (GroupKey);
UPDATE #RankedData
SET EndDate = nextGroup.DateStarted
FROM #RankedData sup
CROSS APPLY
(
SELECT TOP 1 DateStarted
FROM #RankedData sub
WHERE sub.EmployeeID = sup.EmployeeID AND
sub.DepartmentID <> sup.DepartmentID AND
sub.EntryRank > sup.EntryRank
ORDER BY EntryRank ASC
) nextGroup (DateStarted);
SELECT * FROM
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY GroupKey ORDER BY EntryRank ASC) AS GroupRank FROM #RankedData
) FinalRanking
WHERE GroupRank = 1
ORDER BY EntryRank;
DROP TABLE #RankedData
DROP TABLE Source
答案 2 :(得分:0)
这似乎是一个常见的“隔岛”问题。行号rn1
和rn2
的两个序列之间的差异给出了“组”号。
逐个CTE运行此查询,并检查中间结果以了解其工作原理。
样本数据
我从问题中扩展了样本数据。
DECLARE @Source TABLE
(
EmployeeID int,
DateStarted date,
DepartmentID int
)
INSERT INTO @Source
VALUES
(10001,'2013-01-01',001),
(10001,'2013-09-09',001),
(10001,'2013-12-01',002),
(10001,'2014-05-01',002),
(10001,'2014-10-01',001),
(10001,'2014-12-01',001),
(10005,'2013-05-01',001),
(10005,'2013-11-09',001),
(10005,'2013-12-01',002),
(10005,'2014-10-01',001),
(10005,'2016-12-01',001);
查询SQL Server 2008
SQL Server 2008中没有LEAD
函数,因此我不得不通过OUTER APPLY
使用自联接来获取DateEnd
的“ next”行的值。 / p>
WITH
CTE
AS
(
SELECT
EmployeeID
,DateStarted
,DepartmentID
,ROW_NUMBER() OVER (PARTITION BY EmployeeID ORDER BY DateStarted) AS rn1
,ROW_NUMBER() OVER (PARTITION BY EmployeeID, DepartmentID ORDER BY DateStarted) AS rn2
FROM @Source
)
,CTE_Groups
AS
(
SELECT
EmployeeID
,MIN(DateStarted) AS DateStart
,DepartmentID
FROM CTE
GROUP BY
EmployeeID
,DepartmentID
,rn1 - rn2
)
SELECT
CTE_Groups.EmployeeID
,CTE_Groups.DepartmentID
,CTE_Groups.DateStart
,A.DateEnd
FROM
CTE_Groups
OUTER APPLY
(
SELECT TOP(1) G2.DateStart AS DateEnd
FROM CTE_Groups AS G2
WHERE
G2.EmployeeID = CTE_Groups.EmployeeID
AND G2.DateStart > CTE_Groups.DateStart
ORDER BY G2.DateStart
) AS A
ORDER BY
EmployeeID
,DateStart
;
查询SQL Server 2012 +
从SQL Server 2012开始,有一个LEAD
函数使此任务更加高效。
WITH
CTE
AS
(
SELECT
EmployeeID
,DateStarted
,DepartmentID
,ROW_NUMBER() OVER (PARTITION BY EmployeeID ORDER BY DateStarted) AS rn1
,ROW_NUMBER() OVER (PARTITION BY EmployeeID, DepartmentID ORDER BY DateStarted) AS rn2
FROM @Source
)
,CTE_Groups
AS
(
SELECT
EmployeeID
,MIN(DateStarted) AS DateStart
,DepartmentID
FROM CTE
GROUP BY
EmployeeID
,DepartmentID
,rn1 - rn2
)
SELECT
CTE_Groups.EmployeeID
,CTE_Groups.DepartmentID
,CTE_Groups.DateStart
,LEAD(CTE_Groups.DateStart) OVER (PARTITION BY CTE_Groups.EmployeeID ORDER BY CTE_Groups.DateStart) AS DateEnd
FROM
CTE_Groups
ORDER BY
EmployeeID
,DateStart
;
结果
+------------+--------------+------------+------------+
| EmployeeID | DepartmentID | DateStart | DateEnd |
+------------+--------------+------------+------------+
| 10001 | 1 | 2013-01-01 | 2013-12-01 |
| 10001 | 2 | 2013-12-01 | 2014-10-01 |
| 10001 | 1 | 2014-10-01 | NULL |
| 10005 | 1 | 2013-05-01 | 2013-12-01 |
| 10005 | 2 | 2013-12-01 | 2014-10-01 |
| 10005 | 1 | 2014-10-01 | NULL |
+------------+--------------+------------+------------+