如何计算SQL中的最长条纹?

时间:2010-06-15 22:38:04

标签: tsql sql-server-2008

我有

  TABLE EMPLOYEE - ID,DATE,IsPresent

我想计算一个员工在场的最长连线。现在的位将是假的,他没有来的几天。所以我想计算他连续几天上任的最长天数......我有日期列字段是唯一的...所以我尝试了这种方式 -

Select Id,Count(*) from Employee where IsPresent=1

但是上面的方法不起作用......任何人都可以指导我如何计算连续性吗?....我相信人们已经遇到过这种情况......我试过在网上搜索但是...没有理解它好吧...请帮帮我..

5 个答案:

答案 0 :(得分:4)

编辑以下是查询的SQL Server版本:

with LowerBound as (select second_day.EmployeeId
        , second_day."DATE" as LowerDate
        , row_number() over (partition by second_day.EmployeeId 
            order by second_day."DATE") as RN
    from T second_day
    left outer join T first_day
        on first_day.EmployeeId = second_day.EmployeeId
        and first_day."DATE" = dateadd(day, -1, second_day."DATE")
        and first_day.IsPresent = 1
    where first_day.EmployeeId is null
    and second_day.IsPresent = 1)
, UpperBound as (select first_day.EmployeeId
        , first_day."DATE" as UpperDate
        , row_number() over (partition by first_day.EmployeeId 
            order by first_day."DATE") as RN
    from T first_day
    left outer join T second_day
        on first_day.EmployeeId = second_day.EmployeeId
        and first_day."DATE" = dateadd(day, -1, second_day."DATE")
        and second_day.IsPresent = 1
    where second_day.EmployeeId is null
    and first_day.IsPresent = 1)
select LB.EmployeeID, max(datediff(day, LowerDate, UpperDate) + 1) as LongestStreak
from LowerBound LB
inner join UpperBound UB
    on LB.EmployeeId = UB.EmployeeId
    and LB.RN = UB.RN
group by LB.EmployeeId

测试数据的SQL Server版本:

create table T (EmployeeId int
    , "DATE" date not null
    , IsPresent bit not null 
    , constraint T_PK primary key (EmployeeId, "DATE")
)


insert into T values (1, '2000-01-01', 1);
insert into T values (2, '2000-01-01', 0);
insert into T values (3, '2000-01-01', 0);
insert into T values (3, '2000-01-02', 1);
insert into T values (3, '2000-01-03', 1);
insert into T values (3, '2000-01-04', 0);
insert into T values (3, '2000-01-05', 1);
insert into T values (3, '2000-01-06', 1);
insert into T values (3, '2000-01-07', 0);
insert into T values (4, '2000-01-01', 0);
insert into T values (4, '2000-01-02', 1);
insert into T values (4, '2000-01-03', 1);
insert into T values (4, '2000-01-04', 1);
insert into T values (4, '2000-01-05', 1);
insert into T values (4, '2000-01-06', 1);
insert into T values (4, '2000-01-07', 0);
insert into T values (5, '2000-01-01', 0);
insert into T values (5, '2000-01-02', 1);
insert into T values (5, '2000-01-03', 0);
insert into T values (5, '2000-01-04', 1);
insert into T values (5, '2000-01-05', 1);
insert into T values (5, '2000-01-06', 1);
insert into T values (5, '2000-01-07', 0);

很抱歉,这是用Oracle编写的,所以请用相应的SQL Server日期算法代替。

假设:

  • 日期是日期值或 DateTime与时间组件 00:00:00。
  • 主键是 (EmployeeId, Date)
  • 所有字段均为not null
  • 如果员工缺少日期,则存在。 (用于处理数据系列的开头和结尾,但也意味着中间缺少日期会破坏条纹。可能是一个问题,取决于要求。

    with LowerBound as (select second_day.EmployeeId
            , second_day."DATE" as LowerDate
            , row_number() over (partition by second_day.EmployeeId 
                order by second_day."DATE") as RN
        from T second_day
        left outer join T first_day
            on first_day.EmployeeId = second_day.EmployeeId
            and first_day."DATE" = second_day."DATE" - 1
            and first_day.IsPresent = 1
        where first_day.EmployeeId is null
        and second_day.IsPresent = 1)
    , UpperBound as (select first_day.EmployeeId
            , first_day."DATE" as UpperDate
            , row_number() over (partition by first_day.EmployeeId 
                order by first_day."DATE") as RN
        from T first_day
        left outer join T second_day
            on first_day.EmployeeId = second_day.EmployeeId
            and first_day."DATE" = second_day."DATE" - 1
            and second_day.IsPresent = 1
        where second_day.EmployeeId is null
        and first_day.IsPresent = 1)
    select LB.EmployeeID, max(UpperDate - LowerDate + 1) as LongestStreak
    from LowerBound LB
    inner join UpperBound UB
        on LB.EmployeeId = UB.EmployeeId
        and LB.RN = UB.RN
    group by LB.EmployeeId
    

测试数据:

    create table T (EmployeeId number(38) 
        , "DATE" date not null check ("DATE" = trunc("DATE"))
        , IsPresent number not null check (IsPresent in (0, 1))
        , constraint T_PK primary key (EmployeeId, "DATE")
    )
    /

    insert into T values (1, to_date('2000-01-01', 'YYYY-MM-DD'), 1);
    insert into T values (2, to_date('2000-01-01', 'YYYY-MM-DD'), 0);
    insert into T values (3, to_date('2000-01-01', 'YYYY-MM-DD'), 0);
    insert into T values (3, to_date('2000-01-02', 'YYYY-MM-DD'), 1);
    insert into T values (3, to_date('2000-01-03', 'YYYY-MM-DD'), 1);
    insert into T values (3, to_date('2000-01-04', 'YYYY-MM-DD'), 0);
    insert into T values (3, to_date('2000-01-05', 'YYYY-MM-DD'), 1);
    insert into T values (3, to_date('2000-01-06', 'YYYY-MM-DD'), 1);
    insert into T values (3, to_date('2000-01-07', 'YYYY-MM-DD'), 0);
    insert into T values (4, to_date('2000-01-01', 'YYYY-MM-DD'), 0);
    insert into T values (4, to_date('2000-01-02', 'YYYY-MM-DD'), 1);
    insert into T values (4, to_date('2000-01-03', 'YYYY-MM-DD'), 1);
    insert into T values (4, to_date('2000-01-04', 'YYYY-MM-DD'), 1);
    insert into T values (4, to_date('2000-01-05', 'YYYY-MM-DD'), 1);
    insert into T values (4, to_date('2000-01-06', 'YYYY-MM-DD'), 1);
    insert into T values (4, to_date('2000-01-07', 'YYYY-MM-DD'), 0);
    insert into T values (5, to_date('2000-01-01', 'YYYY-MM-DD'), 0);
    insert into T values (5, to_date('2000-01-02', 'YYYY-MM-DD'), 1);
    insert into T values (5, to_date('2000-01-03', 'YYYY-MM-DD'), 0);
    insert into T values (5, to_date('2000-01-04', 'YYYY-MM-DD'), 1);
    insert into T values (5, to_date('2000-01-05', 'YYYY-MM-DD'), 1);
    insert into T values (5, to_date('2000-01-06', 'YYYY-MM-DD'), 1);
    insert into T values (5, to_date('2000-01-07', 'YYYY-MM-DD'), 0);

答案 1 :(得分:4)

groupby失踪。

选择整个办公室的人工日(每个人)总数。

Select Id,Count(*) from Employee where IsPresent=1

选择每位员工的人工日出勤率。

Select Id,Count(*)
from Employee
where IsPresent=1
group by id;

但这仍然不好,因为它计算出席的总天数,而不是连续出席的时间。

您需要做的是使用另一个日期列date2构建临时表。 date2设置为今天。该表是员工缺席的所有日期的列表。

create tmpdb.absentdates as
Select id, date, today as date2
from EMPLOYEE
where IsPresent=0
order by id, date;

所以诀窍是计算两个缺席天之间的日期差异,以找出连续存在的天数。 现在,在每个员工的下一个缺席日期填写date2。每位员工的最新记录将不会更新,但保留今天的值,因为在数据库中没有比今天更多的日期记录。

update tmpdb.absentdates
set date2 = 
  select min(a2.date)
  from
   tmpdb.absentdates a1,
   tmpdb.absentdates a2
  where a1.id = a2.id
    and a1.date < a2.date

上述查询通过对自身执行连接来更新自身,并可能导致死锁查询,因此最好创建临时表的两个副本。

create tmpdb.absentdatesX as
Select id, date
from EMPLOYEE
where IsPresent=0
order by id, date;

create tmpdb.absentdates as
select *, today as date2
from tmpdb.absentdatesX;

您需要插入招聘日期,假设数据库中每位员工的最早日期是招聘日期。

insert into tmpdb.absentdates a
select a.id, min(e.date), today
from EMPLOYEE e
where a.id = e.id

现在更新date2,使用下一个稍后的缺席日期,以便能够执行date2 - date。

update tmpdb.absentdates
set date2 = 
  select min(x.date)
  from
   tmpdb.absentdates a,
   tmpdb.absentdatesX x
  where a.id = x.id
    and a.date < x.date

这将列出emp持续存在的天数:

select id, datediff(date2, date) as continuousPresence
from tmpdb.absentdates
group by id, continuousPresence
order by id, continuousPresence

但你只想要最长连胜:

select id, max(datediff(date2, date) as continuousPresence)
from tmpdb.absentdates
group by id
order by id

然而,上述问题仍然存在问题,因为dateiff没有考虑假期和周末。

因此,我们依赖记录的数量作为合法的工作日。

create tmpdb.absentCount as
Select a.id, a.date, a.date2, count(*) as continuousPresence
from EMPLOYEE e, tmpdb.absentdates a
where e.id = a.id
  and e.date >= a.date
  and e.date < a.date2
group by a.id, a.date
order by a.id, a.date;

请记住,每次使用聚合器(如count,ave)时 你需要按选定的项目列表进行分组,因为通常意义上你必须按它们聚合。

现在选择最大条纹

select id, max(continuousPresence)
from tmpdb.absentCount
group by id

列出连胜日期:

select id, date, date2, continuousPresence
from tmpdb.absentCount
group by id
having continuousPresence = max(continuousPresence);

上面可能存在一些错误(sql server tsql),但这是一般的想法。

答案 2 :(得分:1)

试试这个:

select 
    e.Id,
    e.date,
    (select 
       max(e1.date) 
     from 
       employee e1 
     where 
       e1.Id = e.Id and
       e1.date < e.date and 
       e1.IsPresent = 0) StreakStartDate,
    (select 
       min(e2.date) 
     from 
       employee e2 
     where 
       e2.Id = e.Id and
       e2.date > e.date and
       e2.IsPresent = 0) StreakEndDate           
from 
    employee e
where
    e.IsPresent = 1

然后找出每个员工的最长条纹:

select id, max(datediff(streakStartDate, streakEndDate))
from (<use subquery above>)
group by id

我不完全确定此查询的语法是否正确,因为我刚才没有数据库。 同时注意条纹开始和条纹结束列不包含员工在场的第一天和最后一天,但不包括他缺席时的最近日期。如果表中的日期具有大致相等的距离,这并不意味着,否则查询变得更加复杂,因为我们需要找出最近的存在日期。此外,这种改进还可以处理最长条纹是第一个或最后一个条纹时的情况。

主要想法是员工在场的每个日期都能找到连胜开始和连胜结束。

对于员工在场时表中的每一行,条纹开始的最大日期小于员工不在时当前行的日期。

答案 3 :(得分:1)

这是一个替代版本,以不同的方式处理缺失的日子。假设您只记录工作日的记录,并且在工作日的周一至周五和下周的周一至周五工作计数为连续十天。此查询假定在一系列行的中间缺少日期是非工作日。

with LowerBound as (select second_day.EmployeeId
        , second_day."DATE" as LowerDate
        , row_number() over (partition by second_day.EmployeeId 
            order by second_day."DATE") as RN
    from T second_day
    left outer join T first_day
        on first_day.EmployeeId = second_day.EmployeeId
        and first_day."DATE" = dateadd(day, -1, second_day."DATE")
        and first_day.IsPresent = 1
    where first_day.EmployeeId is null
    and second_day.IsPresent = 1)
, UpperBound as (select first_day.EmployeeId
        , first_day."DATE" as UpperDate
        , row_number() over (partition by first_day.EmployeeId 
            order by first_day."DATE") as RN
    from T first_day
    left outer join T second_day
        on first_day.EmployeeId = second_day.EmployeeId
        and first_day."DATE" = dateadd(day, -1, second_day."DATE")
        and second_day.IsPresent = 1
    where second_day.EmployeeId is null
    and first_day.IsPresent = 1)
select LB.EmployeeID, max(datediff(day, LowerDate, UpperDate) + 1) as LongestStreak
from LowerBound LB
inner join UpperBound UB
    on LB.EmployeeId = UB.EmployeeId
    and LB.RN = UB.RN
group by LB.EmployeeId

go

with NumberedRows as (select EmployeeId
        , "DATE"
        , IsPresent
        , row_number() over (partition by EmployeeId
            order by "DATE") as RN
--        , min("DATE") over (partition by EmployeeId, IsPresent) as MinDate
--        , max("DATE") over (partition by EmployeeId, IsPresent) as MaxDate
    from T)
, LowerBound as (select SecondRow.EmployeeId
        , SecondRow.RN
        , row_number() over (partition by SecondRow.EmployeeId 
            order by SecondRow.RN) as LowerBoundRN
    from NumberedRows SecondRow
    left outer join NumberedRows FirstRow
        on FirstRow.IsPresent = 1
        and FirstRow.EmployeeId = SecondRow.EmployeeId
        and FirstRow.RN + 1 = SecondRow.RN
    where FirstRow.EmployeeId is null
    and SecondRow.IsPresent = 1)
, UpperBound as (select FirstRow.EmployeeId
       , FirstRow.RN
       , row_number() over (partition by FirstRow.EmployeeId
            order by FirstRow.RN) as UpperBoundRN
    from NumberedRows FirstRow
    left outer join NumberedRows SecondRow
        on SecondRow.IsPresent = 1
        and FirstRow.EmployeeId = SecondRow.EmployeeId
        and FirstRow.RN + 1 = SecondRow.RN
    where SecondRow.EmployeeId is null
    and FirstRow.IsPresent = 1)
select LB.EmployeeId, max(UB.RN - LB.RN + 1)
from LowerBound LB 
inner join UpperBound UB
    on LB.EmployeeId = UB.EmployeeId
    and LB.LowerBoundRN = UB.UpperBoundRN
group by LB.EmployeeId

答案 4 :(得分:0)

我这样做了一次,以确定消防员已经连续几天轮班至少15分钟。

你的情况稍微简单一些。

如果您想假设没有员工连续超过32次,您可以使用公用表表达式。但更好的方法是使用临时表和while循环。

您需要一个名为StartingRowID的列。继续从临时表连接到employeeWorkDay表,以便下一个连续的员工工作日,并将它们插回到临时表中。当@@ Row_Count = 0时,您已捕获最长条纹。

现在通过StartingRowID聚合以获得最长连胜的第一天。我的时间不够,或者我会包含一些示例代码。