我有一个表列Id和EmployeeID。表数据具有以下特点:在某些部分(Id是连续的),有时可以找到相同的EmployeeID,例如
Id | EmployeeID
---------------
1 | 1
2 | 1
3 | 2
4 | 5
5 | 1
6 | 1
我想构建一个查询来查找包含相同EmployeeID的数据块,其中Id是连续的(最小值为x记录)。到目前为止,我提出了:
SELECT EmployeeID, MIN(Id), MAX(Id), COUNT(*)
FROM recs
GROUP BY EmployeeID
HAVING COUNT(*) > 5 AND
MAX(Id) - MIN(Id) + 1 = COUNT(*)
此查询将为我提供一些(但不是全部)数据块,只要同一个Employee只能在一个块中找到。任何人都可以提出一个解决方案,为每位员工提供所有不同的数据块吗?
答案 0 :(得分:2)
加入同一个表,其中table1.Id = table2.Id + 1和table1.employeeid = table2.employeeid
答案 1 :(得分:1)
不是最好的解决方案,但它应该有效(例如,3个连续的ID):
SELECT Id, EmployeeID FROM
(
SELECT r.Id, r.EmployeeID,
(SELECT COUNT(1) FROM recs r1 WHERE (r1.EmployeeID = r.EmployeeID AND r1.id = r.Id-1) AS c1,
(SELECT COUNT(1) FROM recs r2 WHERE (r2.EmployeeID = r.EmployeeID AND r2.id = r.Id-2) AS c2,
(SELECT COUNT(1) FROM recs r3 WHERE (r3.EmployeeID = r.EmployeeID AND r3.id = r.Id-3) AS c3
FROM recs r1) tab1
WHERE (tab1.c1+tab1.c2+tab1.c3 =3);
我建议Id是主要(或唯一)密钥。如果不是,你应该将每个子查询改为SELECT IF(COUNT(1)> 0,1,0)......
答案 2 :(得分:0)
使用临时表。使用此解决方案:
SELECT EmployeeID, MIN(Id) AS Min, MAX(Id) AS Max, COUNT(*) AS Count
INTO #TempTable
FROM recs
GROUP BY EmployeeID
SELECT * FROM #TempTable WHERE
Count > 5 AND
Max - Min + 1 = Count
编辑回答
请试试这个:
SELECT * FROM(
SELECT EmployeeID, MIN(Id) AS min, MAX(Id) AS max, COUNT(*) AS count
FROM recs
GROUP BY EmployeeID) AS Table
WHERE Table.count > 5 AND
Table.max - Table.min + 1 = Table.count
答案 3 :(得分:0)
If Exists(Select 1 From INFORMATION_SCHEMA.TABLES Where TABLE_NAME = 'recs')
DROP TABLE recs
GO
Create Table recs
(
Id int not null
, EmployeeId int not null
)
Insert recs(Id, EmployeeId)
Values (1,1) ,(2,1) ,(3,1) ,(4,2) ,(5,5) ,(6,1) ,(7,1) ,(8,1) ,(10,1)
,(11,1) ,(12,1) ,(13,2) ,(14,2) ,(15,2) ,(16,2)
接下来,您需要一个包含一系列数字的Tally或Numbers表。我只在这一个中放了500个元素,但考虑到数据的大小,你可能需要更多。 Tally表中的最大数字应该大于recs表中的最大ID。
Create Table dbo.Tally(Num int not null)
GO
;With Numbers As
(
Select ROW_NUMBER() OVER ( ORDER BY s1.object_id) As Num
From sys.columns as s1
)
Insert dbo.Tally(Num)
Select Num
From Numbers
Where Num < 500
现在为实际的解决方案。基本上,我使用一系列CTE来推断连续序列的起点和终点。
; With
Employees As
(
Select Distinct EmployeeId
From dbo.Recs
)
, SequenceGaps As
(
Select E.EmployeeId, T.Num, R1.Id
From dbo.Tally As T
Cross Join Employees As E
Left Join dbo.recs As R1
On R1.EmployeeId = E.EmployeeId
And R1.Id = T.Num
Where T.Num <= (
Select Max(R3.Id)
From dbo.Recs As R3
Where R3.EmployeeId = E.EmployeeId
)
)
, EndIds As
(
Select S.EmployeeId
, Case When S1.Id Is Null Then S.Id End As [End]
From SequenceGaps As S
Join SequenceGaps As S1
On S1.EmployeeId = S.EmployeeId
And S1.Num = (S.Num + 1)
Where S.Id Is Not Null
And S1.Id Is Null
Union All
Select S.EmployeeId, Max( Id )
From SequenceGaps As S
Where S.Id Is Not Null
Group By S.EmployeeId
)
, SequencedEndIds As
(
Select EmployeeId, [End]
, ROW_NUMBER() OVER (PARTITION BY EmployeeId ORDER BY [End]) As SequenceNum
From EndIds
)
, StartIds As
(
Select S.EmployeeId
, Case When S1.Id Is Null Then S.Id End As [Start]
From SequenceGaps As S
Join SequenceGaps As S1
On S1.EmployeeId = S.EmployeeId
And S1.Num = (S.Num - 1)
Where S.Id Is Not Null
And S1.Id Is Null
Union All
Select S.EmployeeId, 1
From SequenceGaps As S
Where S.Id = 1
)
, SequencedStartIds As
(
Select EmployeeId, [Start]
, ROW_NUMBER() OVER (PARTITION BY EmployeeId ORDER BY [Start]) As SequenceNum
From StartIds
)
, SequenceRanges As
(
Select S1.EmployeeId, Start, [End]
From SequencedStartIds As S1
Join SequencedEndIds As S2
On S2.EmployeeId = S1.EmployeeId
And S2.SequenceNum = S1.SequenceNum
)
Select *
From SequenceGaps As SG
Where Exists(
Select 1
From SequenceRanges As SR
Where SR.EmployeeId = SG.EmployeeId
And SG.Id Between SR.Start And SR.[End]
And ( SR.[End] - SR.[Start] + 1 ) >= @SequenceSize
)
使用WHERE子句中的final语句和@SequenceSize,可以控制返回哪些序列。