SQL进入和退出点查询的特定问题

时间:2019-04-10 09:16:11

标签: sql sql-server gaps-and-islands

背景: 我有一些仪器数据,其中包含数十万条记录。我通过花几分钟的时间来简化此过程。原始数据在某个时间点将具有特定的读数,例如0.004,但是由于时间切片的数据现在是汇总,因此现在包含该时间切片上相同读数的3个读数的Min / Max / Avg。

我正在尝试在此数据上创建入口点和出口点。我花了几天尝试解决的一个问题是,我不希望出口点与入口点处于同一时间段。它必须具有更高的PKey。

例如,这是错误的,因为第一条记录在同一PKey上进入和退出。 enter image description here

查看数据,我将退出出口和出口点如下

输入3550724-退出3551615

输入3559070-退出3571982

输入3575126-退出NULL

如下

enter image description here

请有人可以帮助纠正我的查询,以便退出点始终晚于进入点且不相等。如果没有出口,则显示该入口,但出口为空。

我当前的查询:

declare @EntryMinS1 float = 0.00418848167539267;
declare @ExitMaxT1 float = 0.00429319371727749;
declare @MinPressure float = 209.424083769634;
declare @YearFrom int = 2017;

with cte2 AS 
 ( -- apply your logic to mark potential entry and exit rows
   SELECT *
     ,CASE WHEN [Pressure] >= @MinPressure and MinS1 <= @EntryMinS1 THEN pKey END AS possibleEntry 
     ,CASE WHEN [Pressure] >= @MinPressure and MaxT1 >= @ExitMaxT1 THEN pKey END AS possibleExit
   FROM dbo.tblTestIntrumentData
   where year([TimeSlice]) >= @YearFrom
 )
 , cte3 as
(
    select * 
    ,Max(possibleEntry) -- most recent possibleEntry
      Over (PARTITION BY [IntrumentId], [TestName]
            ORDER BY pKey
            ROWS Unbounded Preceding) AS lastEntry 

    from cte2
)
, cte4 as
(
    select *
    ,Max(possibleExit) -- most recent possibleExit
      Over (PARTITION BY [IntrumentId], [TestName]
            ORDER BY pKey
            ROWS BETWEEN Unbounded Preceding AND 1 Preceding) AS lastExit
    from cte3
)
,groupRows AS 
( -- mark rows from the 1st entry to the exit row
SELECT *
    -- if lastEntry <= lastExit we're after an exit and before an entry -> don't return this row
    ,CASE WHEN lastEntry <= lastExit THEN 0 ELSE 1 END AS returnFlag
    -- assign the same group number to consecutive rows in group 
    ,Sum(CASE WHEN lastEntry <= lastExit THEN 1 ELSE 0 END)
    Over (PARTITION BY [IntrumentId], [TestName]
        ORDER BY pKey
        ROWS Unbounded Preceding) AS grp
FROM cte4
WHERE (possibleEntry IS NOT NULL OR possibleExit IS NOT NULL)
    AND lastEntry IS NOT NULL
)
,rowNum AS
 ( -- get the data from the first and last row of an entry/exit group
   SELECT *
     -- to get the values of the 1st row in a group
     ,Row_Number() Over (PARTITION BY [IntrumentId], [TestName], grp ORDER BY pKey) AS rn
     -- to get the values of the last row in a group
     ,Last_Value(possibleExit)
      Over (PARTITION BY [IntrumentId], [TestName], grp
            ORDER BY pKey
            ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitTimestamp
     ,Last_Value(CASE WHEN possibleExit IS NOT NULL THEN PKey END)
      Over (PARTITION BY [IntrumentId], [TestName], returnFlag, grp
            ORDER BY pKey
            ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitPKey
   FROM groupRows
   WHERE returnFlag = 1
 )
select * from rowNum
where IntrumentId = 'N-12892'
and TestName = 'T451'
and rn = 1
ORDER BY IntrumentId, TestName, PKey

使用架构和数据创建表:

USE [dbTestData]
GO
/****** Object:  Table [dbo].[tblTestIntrumentData]    Script Date: 10/04/2019 10:13:20 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[tblTestIntrumentData](
    [TimeSlice] [datetime2](7) NULL,
    [IntrumentId] [varchar](7) NOT NULL,
    [TestName] [varchar](4) NOT NULL,
    [AvgT1] [float] NULL,
    [MinT1] [float] NULL,
    [MaxT1] [float] NULL,
    [AvgS1] [float] NULL,
    [MinS1] [float] NULL,
    [MaxS1] [float] NULL,
    [MaxT2] [float] NULL,
    [MaxS2] [float] NULL,
    [MinB1] [float] NULL,
    [Pressure] [float] NULL,
    [pKey] [bigint] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T09:55:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.004, 0.005, 0.004, 0.004, 0.005, 1.116, 2.36, 0.003, 13025.385, 3550724)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:00:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.005, 0.005, 0.005, 0.005, 0.005, 0.006, 2.629, 0.438, 0.001, 15149.751, 3571982)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:25:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.006, 0.006, 0.006, 0.006, 0.006, 0.007, 0.209, 2.718, 0.017, 13562.116, 3555221)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:30:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.006, 0.006, 0.006, 0.006, 0.006, 0.006, 0.487, 3.223, 0.002, 13607.694, 3555878)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:35:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.003, 0.004, 0.004, 0.003, 0.004, 5.202, 3.001, 0.001, 16065.146, 3577673)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:40:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.003, 0.004, 0.004, 0.004, 0.004, 4.482, 2.902, 0.001, 16350.153, 3578519)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:10:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.007, 0.007, 0.007, 0.007, 0.007, 0.007, 0.962, 0.275, 0.005, 13295.4, 3553139)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:45:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.005, 0.005, 0.005, 0.005, 0.005, 0.006, 4.498, 14.667, 0.004, 13854.237, 3558230)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:20:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.004, 0.004, 0.004, 0.004, 0.005, 5.426, 9.98, 0.003, 15762.039, 3575126)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:05:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.007, 0.007, 0.008, 0.008, 0.008, 0.008, 1.007, 0.524, 0.001, 13273.765, 3552443)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:15:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.007, 0.006, 0.007, 0.007, 0.007, 0.007, 3.585, 0.725, 0.003, 13378.195, 3553832)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:50:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.004, 0.004, 0.004, 0.004, 0.005, 5.779, 8.781, 0.003, 13900.076, 3559070)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T11:00:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.004, 0.004, 0.004, 0.004, 0.004, 2.537, 2.045, 0.088, 14061.833, 3560585)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:20:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.007, 0.006, 0.007, 0.007, 0.007, 0.007, 3.608, 3.609, 0.023, 13490.572, 3554492)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:55:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.004, 0.004, 0.004, 0.004, 0.004, 1.39, 1.569, 0.001, 13951.427, 3559895)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:10:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 1.141, 3.528, 0.002, 15257.154, 3573476)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:35:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.006, 0.005, 0.006, 0.006, 0.006, 0.006, 0.352, 0.898, 0.002, 13649.856, 3556601)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:40:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.005, 0.005, 0.005, 0.006, 0.005, 0.006, 4.496, 0.835, 0.001, 13824.016, 3557420)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:30:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.004, 0.004, 0.004, 0.004, 0.004, 21.651, 2.161, 0.005, 15963.565, 3576818)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-03T10:00:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.007, 0.005, 0.007, 0.007, 0.007, 0.007, 1.257, 0.301, 0.001, 13226.338, 3551615)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T03:55:26.7680000' AS DateTime2), N'N-12892', N'T451', NULL, NULL, NULL, NULL, NULL, NULL, 0.018, 0.009, 0.018, 14788.663, 3571283)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:05:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 2.333, 4.672, 0.001, 15172.775, 3572744)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:15:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.004, 0.005, 0.005, 0.004, 0.005, 4.181, 2.161, 0.018, 15366.178, 3574277)
INSERT [dbo].[tblTestIntrumentData] ([TimeSlice], [IntrumentId], [TestName], [AvgT1], [MinT1], [MaxT1], [AvgS1], [MinS1], [MaxS1], [MaxT2], [MaxS2], [MinB1], [Pressure], [pKey]) VALUES (CAST(N'2018-11-04T04:25:26.7680000' AS DateTime2), N'N-12892', N'T451', 0.004, 0.004, 0.004, 0.004, 0.004, 0.004, 3.549, 3.854, 0.003, 15838.353, 3575948)

1 个答案:

答案 0 :(得分:1)

我认为这可以简化一些,但是尝试在我当前解决方案的框架之外思考以找到实际的简化方法有点卡住了。无论如何:

declare @EntryMinS1 float = 0.00418848167539267;
declare @ExitMaxT1 float = 0.00429319371727749;
declare @MinPressure float = 209.424083769634;
declare @YearFrom int = 2017;

with cte2 AS 
 ( -- apply your logic to mark potential entry and exit rows
   SELECT *
     ,CASE WHEN [Pressure] >= @MinPressure and MinS1 <= @EntryMinS1 THEN pKey END AS possibleEntry 
     ,CASE WHEN [Pressure] >= @MinPressure and MaxT1 >= @ExitMaxT1 THEN pKey END AS possibleExit
   FROM dbo.tblTestIntrumentData
   where year([TimeSlice]) >= @YearFrom
),
cteFindExit AS
(
select *, -- find the first entry with an exit after our entry
       (SELECT MIN(pkey) FROM cte2 findExit 
         WHERE findExit.possibleExit > entrance.possibleEntry
           AND findExit.IntrumentId = entrance.IntrumentId
           AND findExit.TestName = entrance.TestName
         ) findExit
  from cte2 entrance
),
cteFindEntry AS
(
SELECT *
       -- it's not an entry if 
     , IIF(possibleEntry IS NULL  -- isn't a possible entry
           OR findExit = LAG(findExit, 1, 0) OVER (PARTITION BY IntrumentId, TestName ORDER BY pkey) -- or has the same exit as the previous record
           OR COALESCE(possibleExit, findExit) IS NULL, -- or doesn't have an exit at all
           0,
           1 -- yeah it's a starter all right
           ) AS starter
  FROM cteFindExit
)
SELECT * FROM cteFindEntry WHERE starter = 1

possibleEntryfindExit定义集合。 Here's a working example on dbfiddle

cteFindExit中,我正在寻找第一个可能退出的条目,该条目可能在此之后出现。因此对于我们的起始行,这将是出口。然后在cteFindEntry中,我将丢弃实际上不是第一个条目的所有行,并且在以下情况下会发生这种情况:

  • 这条记录根本不是条目
  • 或者,对于那些退出的用户(从cteFindExit退出)与前任的退出用户相同(这适用于屏幕快照的第二组)
  • 或者,对于那些没有可能与自己不同的出口的出口,设置了可能的出口(这将适用于第三组,当出口确实为NULL时,将从该组中找到第一条记录)