Sql Server - 为每个ID获取更改次数值的最快方法

时间:2017-02-03 13:38:35

标签: sql sql-server tsql rdbms

我正在编写SQL查询以根据以下要求显示不同类型的结果:

  1. 显示记录的值从 1更改为0 ,在给定时间戳内的最大次数
  2. 显示记录的值从 1更改为0 ,在给定时间戳内的最短时间
  3. 显示前10名记录,其值从 1更改为0 ,在给定时间戳内的最长时间
  4. 示例数据:

    +----------+-------------+-------------+
    | DeviceId | CaptureTime | SensorValue |
    +----------+-------------+-------------+
    |  DC001   | 02/01/2017  |      0      |
    |  DC001   | 02/02/2017  |      1      |
    |  DC001   | 02/03/2017  |      0      |
    |  DC001   | 02/04/2017  |      1      |
    |  DC001   | 02/05/2017  |      0      |
    |  DC001   | 02/07/2017  |      1      |
    |  DC001   | 02/08/2017  |      0      |
    |  DC001   | 02/10/2017  |      1      |
    |  DC001   | 02/01/2017  |      0      |
    |  DC001   | 02/01/2017  |      0      |
    |  DC002   | 02/02/2017  |      1      |
    |  DC002   | 02/02/2017  |      0      |
    |  DC002   | 02/02/2017  |      1      |
    |  DC002   | 02/02/2017  |      1      |
    |  DC002   | 02/02/2017  |      1      |
    |  DC002   | 02/03/2017  |      1      |
    |  DC002   | 02/03/2017  |      0      |
    |  DC002   | 02/03/2017  |      0      |
    |  DC002   | 02/03/2017  |      1      |
    |  DC002   | 02/03/2017  |      1      |
    |  DC003   | 02/03/2017  |      1      |
    |  DC003   | 02/03/2017  |      1      |
    |  DC003   | 02/03/2017  |      0      |
    |  DC003   | 02/03/2017  |      1      |
    |  DC003   | 02/03/2017  |      1      |
    |  DC003   | 02/04/2017  |      1      |
    |  DC003   | 02/05/2017  |      1      |
    |  DC003   | 02/06/2017  |      1      |
    |  DC003   | 02/07/2017  |      1      |
    |  DC003   | 02/08/2017  |      1      |
    |  DC004   | 02/09/2017  |      0      |
    |  DC004   | 02/10/2017  |      0      |
    |  DC004   | 02/11/2017  |      1      |
    |  DC004   | 02/12/2017  |      0      |
    |  DC004   | 02/12/2017  |      1      |
    |  DC004   | 02/12/2017  |      1      |
    |  DC004   | 02/12/2017  |      1      |
    |  DC004   | 02/12/2017  |      1      |
    |  DC004   | 02/12/2017  |      1      |
    |  DC004   | 02/12/2017  |      1      |
    |  DC005   | 02/12/2017  |      0      |
    |  DC005   | 02/12/2017  |      0      |
    |  DC005   | 02/12/2017  |      0      |
    |  DC005   | 02/12/2017  |      0      |
    |  DC005   | 02/14/2017  |      0      |
    |  DC005   | 02/14/2017  |      0      |
    |  DC005   | 02/14/2017  |      0      |
    |  DC005   | 02/14/2017  |      0      |
    |  DC005   | 02/14/2017  |      0      |
    |  DC005   | 02/14/2017  |      0      |
    +----------+-------------+-------------+
    

    我已经为所有三个要求创建了以下常见查询:

    DECLARE @HoursBack  INT
            , @MinMax   VARCHAR(3)
            , @TopRows  INT
    
    SELECT  TOP (@TopRows) COUNT(TD1.DeviceId) PickedNoOfTimes, ItemName -- I have removed table to get ItemName to simplify this query
    FROM    tTrayDetails AS TD1
    WHERE   TD1.SensorValue = 0
    AND     TD1.CaptureTime > DATEADD(HOUR, -@HoursBack, GETDATE())
    AND     TD1.SensorValue <> (
                                    SELECT  TOP 1 SensorValue
                                    FROM    tTrayDetails TD2
                                    WHERE   TD2.CaptureTime < TD1.CaptureTime
                                    ORDER BY TD2.CaptureTime DESC
                                )
    GROUP BY    TD1.DeviceId
    ORDER BY    CASE WHEN @MinMax = 'Max' THEN COUNT(TD1.DeviceId) END DESC
                , CASE WHEN @MinMax = 'Min' THEN COUNT(TD1.DeviceId) END ASC
    

    DECLARE @HoursBack INT , @MinMax VARCHAR(3) , @TopRows INT SELECT TOP (@TopRows) COUNT(TD1.DeviceId) PickedNoOfTimes, ItemName -- I have removed table to get ItemName to simplify this query FROM tTrayDetails AS TD1 WHERE TD1.SensorValue = 0 AND TD1.CaptureTime > DATEADD(HOUR, -@HoursBack, GETDATE()) AND TD1.SensorValue <> ( SELECT TOP 1 SensorValue FROM tTrayDetails TD2 WHERE TD2.CaptureTime < TD1.CaptureTime ORDER BY TD2.CaptureTime DESC ) GROUP BY TD1.DeviceId ORDER BY CASE WHEN @MinMax = 'Max' THEN COUNT(TD1.DeviceId) END DESC , CASE WHEN @MinMax = 'Min' THEN COUNT(TD1.DeviceId) END ASC 此查询适用于所有三个要求,只需设置@HoursBack,@ MinMax和@TopRows变量的不同值。

    以下是为我的三个要求设置的值

    1. @HoursBack = 24,@ MinMax ='Max',@ TopRows = 1
    2. @HoursBack = 24,@ MinMax ='Min',@ TopRows = 1
    3. @HoursBack = 24,@ MinMax ='Max',@ TopRows = 10
    4. 现在问题:此查询大约需要40秒执行,仅适用于测试环境中的14K记录。

      在生产环境中,每天会添加2-4K记录,因此查询执行时间会增加。

      如何使用大量数据更改查询以更快地运行。

2 个答案:

答案 0 :(得分:2)

这将仅计算SensorValue从1更改为0的那些行:

WITH cte AS
 (
   SELECT DeviceId,
      -- previous row = 1 and current row = 0
      CASE WHEN LAG(SensorValue)
                Over (PARTITION BY DeviceId
                      ORDER BY CaptureTime) = 1
                AND SensorValue = 0
           THEN 1
           ELSE 0
      END AS ChangeFlag
   FROM tTrayDetails AS t
   WHERE ....
 )
SELECT DeviceId, Count(*)
FROM cte
WHERE ChangeFlag = 1
GROUP BY DeviceId

现在应用您的TOP / ORDER BY ...

答案 1 :(得分:1)

这是:

declare
    @topRows int = 2,
    @minMax nvarchar(3) = 'max',
    @hoursBack int = 1000,
    @now datetime = getdate();

;with _raw
as (
    select
        DeviceId,
        case when SensorValue = 0 and lag(SensorValue) over (partition by DeviceId order by CaptureTime) = 1
            then 1
            else 0 
        end as Val
    from tTrayDetails
    where
        CaptureTime > dateadd(hour, -@hoursBack, @now)
)
, _combined
as (
    select
        DeviceId,
        sum(Val) as Val,
        (case when @minMax = 'min' then 1 else -1 end) * sum(Val) as Ord
    from _raw
    group by
        DeviceId
)
select top(@topRows)
    DeviceId, Val
from _combined
order by
    Ord, DeviceId

和测试脚本一样:

create table #tTrayDetails
(
    DeviceId nvarchar(128),
    CaptureTime datetime not null,
    SensorValue int not null
)

insert into #tTrayDetails(DeviceId, CaptureTime, SensorValue) values
    ('DC001', '2017-01-01 01:00:00', 0),
    ('DC001', '2017-01-01 02:00:00', 1),
    ('DC001', '2017-01-02 01:00:00', 0),
    ('DC001', '2017-01-03 01:00:00', 1),
    ('DC001', '2017-01-04 01:00:00', 0),
    ('DC002', '2017-01-01 01:00:00', 0),
    ('DC002', '2017-01-01 02:00:00', 0),
    ('DC002', '2017-01-01 03:00:00', 1),
    ('DC002', '2017-01-01 04:00:00', 1),
    ('DC002', '2017-01-01 05:00:00', 1),
    ('DC002', '2017-01-01 06:00:00', 0),
    ('DC003', '2017-01-01 06:00:00', 0)

declare
    @topRows int = 2,
    @minMax nvarchar(3) = 'max',
    @hoursBack int = 1000,
    @now datetime = getdate();

;with _raw
as (
    select
        DeviceId,
        case when SensorValue = 0 and lag(SensorValue) over (partition by DeviceId order by CaptureTime) = 1
            then 1
            else 0 
        end as Val
    from #tTrayDetails
    where
        CaptureTime > dateadd(hour, -@hoursBack, @now)
)
, _combined
as (
    select
        DeviceId,
        sum(Val) as Val,
        (case when @minMax = 'min' then 1 else -1 end) * sum(Val) as Ord
    from _raw
    group by
        DeviceId
)
select top(@topRows)
    DeviceId, Val
from _combined
order by
    Ord, DeviceId


drop table #tTrayDetails