SQL在组中查找连续数字

时间:2015-08-05 16:10:00

标签: sql sql-server

我有一个类似于所示的表格。它包含用户ID列表,一天中每小时的小时值以及用于确定该用户在该小时内是否可用的Avail标志。

我需要列出所有可用于连续数小时的用户ID,定义为@n

#####################
# UID # Avail # Hour#
#####################
# 123 #   1   #  0  #
# 123 #   1   #  1  #
# 123 #   0   #  2  #
# 123 #   0   #  3  #
# 123 #   0   #  4  #
# 123 #   1   #  5  #
# 123 #   1   #  6  #
# 123 #   1   #  7  #
# 123 #   1   #  8  #
# 341 #   1   #  0  #
# 341 #   1   #  1  #
# 341 #   0   #  2  #
# 341 #   1   #  3  #
# 341 #   1   #  4  #
# 341 #   0   #  5  #
# 341 #   1   #  6  # 
# 341 #   1   #  7  #
# 341 #   0   #  8  #
######################

这将导致@ n = 3

的以下输出
#######
# UID #
#######
# 123 #
#######

我试图使用     ROW_NUMBER()结束(UID分区,Uail ORDER BY UID,小时) 为由UID分区的每一行分配一个号码,以及它们是否被标记为可用。但是这不起作用,因为可用时段可能每天更改多次,而ROW_NUMBER()函数仅根据Avail标志保留每个用户两个计数。

4 个答案:

答案 0 :(得分:3)

如果您正在使用SQL Server 2012+,则可以使用窗口SUM,但是您必须提前指定窗口框架中的行数,因为它不会接受变量,因此它可以使用SQL Server 2012+。不那么灵活:

;with cte as 
(
    select distinct 
       UID, 
       SUM(avail) over (partition by uid 
                        order by hour 
                        rows between current row and 2 following
                       ) count 
    from table1
)
select uid from cte where count = 3;

如果您需要灵活性,可以将其作为存储过程并使用动态SQL来构建和执行语句,如下所示:

create procedure testproc (@n int) as
declare @sql nvarchar(max)
set @sql = concat('
    ;with cte as 
    (
       select distinct 
          UID, 
          SUM(avail) over (partition by uid 
                        order by hour 
                        rows between current row and ', @n - 1 , ' following
                        ) count 
       from table1
    )
    select uid from cte where count = ' , @n , ';')
exec sp_executesql @sql

并使用execute testproc 3

执行它

更灵活的解决方案是使用相关子查询,但是您必须为每个添加的计数添加另一个子查询:

select distinct uid 
from Table1 t1
where Avail = 1
  and exists (select 1 from Table1 where Avail = 1 and UID = t1.UID and Hour = t1.Hour + 1)
  and exists (select 1 from Table1 where Avail = 1 and UID = t1.UID and Hour = t1.Hour + 2);

还有另外一种方法,使用row_number查找岛屿,然后按每个岛屿的可用总和进行过滤:

;with c as (
    select 
       uid, avail, 
       row_number() over (partition by uid order by hour) 
       - row_number() over (partition by uid, avail order by hour) grp
from table1
)

select uid from c
group by uid, grp
having sum(avail) >= 3 

答案 1 :(得分:1)

这有效...... 它在userID和@n(3hr)中的第二个表中进行自我连接,然后只返回那些有3个记录的记录。

SELECT A.UID
FROM foo A
INNER JOIN foo B
 on A.UId = B.UID
 and A.Hour+3 <= B.Hour
 and A.Avail= 1 and B.Avail=1
GROUP BY A.UID
HAVING count(distinct B.hour) =3

http://sqlfiddle.com/#!6/f97ee

答案 2 :(得分:1)

没有时间对此进行改进......但这是一种选择。

  • 首先CTE(c)创建新列Id
  • 第二个CTE(mx)获取最大行数,因为您无法在递归CTE中使用聚合
  • 最终的CTE(rc)是肉的所在。

    ;WITH c AS (
        SELECT ROW_NUMBER() OVER (ORDER BY [UID],[Hour]) Id, 
            [UID],Avail,[Hour]
        FROM #tmp
    ), mx AS (
        SELECT MAX(Id) MaxRowCount FROM c
    ), rc AS (
    
        SELECT Id, [UID], Avail, [Hour], c.Avail AS CummulativeHour
        FROM c
        WHERE Id = 1
    
        UNION ALL
    
        SELECT c.Id, c.[UID], c.Avail, c.[Hour], CASE WHEN rc.Avail = 0 OR c.Avail = 0 OR rc.[UID] <> c.[UID] THEN c.Avail
                                                    WHEN rc. Avail = 1 AND c.Avail = 1 THEN rc.CummulativeHour + 1 END AS CummulativeHour
        FROM rc
        JOIN c
            ON rc.Id + 1 = c.Id
        WHERE c.Id <= (SELECT mx.MaxRowCount FROM mx)
    
    )
    SELECT * FROM rc
    

以下是样本数据创建...

CREATE TABLE #tmp ([UID] INT, Avail INT, [Hour] INT)

INSERT INTO #tmp
        ( UID, Avail, Hour )
VALUES  (123,1,0),
(123,1,1),
(123,0,2),
(123,0,3),
(123,0,4),
(123,1,5),
(123,1,7),
(123,1,8),
(341,1,0),
(341,0,2),
(341,1,3),
(341,1,4),
(341,0,5),
(341,1,6),
(341,1,7),
(341,0,8)

答案 3 :(得分:0)

以下几个CTE的主查询为您提供了几种可能性,以显示您需要的内容(每个用户最多,N小时用户等)。只需更新CTE下方的​​最后一个查询。

创建表格和数据:

declare @hours table(
uid int
, avail bit
, h tinyint
)
insert into @hours(uid, avail, h) values 
(123, 1, 0),
(123, 1, 1),
(123, 0, 2),
(123, 0, 3),
(123, 0, 4),
(123, 1, 5),
(123, 1, 6),
(123, 1, 7),
(123, 1, 8),
(341, 1, 0),
(341, 1, 1),
(341, 0, 2),
(341, 1, 3),
(341, 1, 4),
(341, 0, 5),
(341, 1, 6), 
(341, 1, 7),
(341, 0, 8),
(341, 1, 23) -- loop through midnight

最后一行已添加,表明它可以检测午夜左右的连续时间(请参阅后面的cte)。即23 =&gt; 2AM for uid 341

每个用户查询MAX连续小时数:

-- remove duplicate, wrong hours and not available hours
;with hs as (
    Select distinct uid, h from @hours where avail = 1 and h < 24 
), loop(uid, first, last, diff) as (
    --loop through successive hours
    select uid, cast(h as tinyint), cast(h+1 as int), cast(1 as int) from hs
    union all
    select l.uid, l.first, l.last+1, l.diff+1 from loop as l
    inner join hs as h on l.uid = h.uid and l.last = h.h
), back(uid, first, last, diff) as (
    -- search for successive hours around midnight
    select uid, first, last, diff from loop
    union
    select l1.uid, l1.first, l2.last, l1.diff+l2.diff from loop as l1
    inner join loop as l2 on l1.uid = l2.uid and l1.last = 24 and l2.first = 0
), mx(uid, diff) as (
    -- get max continuous hours per user
    select uid, max(diff) from back group by uid
)
-- main query, change it based on what you need (see below)
select b.uid, b.first, b.last, b.diff from back as b
inner join mx as m on m.uid = b.uid and m.diff = b.diff
order by uid, diff desc

结果:

uid first   last    diff
123 5       9       4
341 23      2       3 <= present because I added 341/1/23. Would not be here otherwise

让用户至少连续工作3小时(用这一个替换最后一个选择):

select distinct uid from back where diff >= 3 -- @n goes here

请注意,我认为(123,1,5)从5到6给出1小时。因此5到8给你从5到9的4小时。