时间序列图数据的入口点和出口点

时间:2018-10-01 14:46:35

标签: sql sql-server tsql sql-server-2016

在SQL中实际上可以进行以下操作吗?

我有一些时间序列数据,我想根据价格提取一些出入点。

所需的输出:

enter image description here

示例数据:

enter image description here

SQL数据:

CREATE TABLE Control
    ([PKey] int, [TimeStamp] datetime, [Name] varchar(10), [Price1] float, [Price2] float);

INSERT INTO Control ([PKey], [TimeStamp], [Name], [Price1], [Price2])
VALUES
    (1,'2018-10-01 09:00:00', 'Name1',120, 125),
    (2,'2018-10-01 09:10:00', 'Name1',110, 115),
    (3,'2018-10-01 09:20:00', 'Name1',101, 106),
    (4,'2018-10-01 09:30:00', 'Name1',105, 110),
    (5,'2018-10-01 09:40:00', 'Name1',106, 111),
    (6,'2018-10-01 09:50:00', 'Name1',108, 113),
    (7,'2018-10-01 10:00:00', 'Name1',110, 115),
    (8,'2018-10-01 10:10:00', 'Name1',104, 109),
    (9,'2018-10-01 10:20:00', 'Name1',101, 106),
    (10,'2018-10-01 10:30:00', 'Name1',99, 104),
    (11,'2018-10-01 10:40:00', 'Name1',95, 100),
    (12,'2018-10-01 10:50:00', 'Name1',101, 106),
    (13,'2018-10-01 11:00:00', 'Name1',102, 107),
    (14,'2018-10-01 11:10:00', 'Name1',101, 106),
    (15,'2018-10-01 11:20:00', 'Name1',99, 104),
    (16,'2018-10-01 11:30:00', 'Name1',105, 110),
    (17,'2018-10-01 11:40:00', 'Name1',108, 113),
    (18,'2018-10-01 11:50:00', 'Name1',108, 113),
    (19,'2018-10-01 12:00:00', 'Name1',109, 114),
    (20,'2018-10-01 12:10:00', 'Name1',108, 113),
    (21,'2018-10-01 12:20:00', 'Name1',105, 110),
    (22,'2018-10-01 12:30:00', 'Name1',101, 106),
    (23,'2018-10-01 12:40:00', 'Name1',102, 107),
    (24,'2018-10-01 09:00:00', 'Name2',103, 108),
    (25,'2018-10-01 09:10:00', 'Name2',101, 106),
    (26,'2018-10-01 09:20:00', 'Name2',104, 109),
    (27,'2018-10-01 09:30:00', 'Name2',106, 111),
    (28,'2018-10-01 09:40:00', 'Name2',108, 113),
    (29,'2018-10-01 09:50:00', 'Name2',108, 113),
    (30,'2018-10-01 10:00:00', 'Name2',105, 110),
    (31,'2018-10-01 10:10:00', 'Name2',103, 108),
    (32,'2018-10-01 10:20:00', 'Name2',101, 106),
    (33,'2018-10-01 10:30:00', 'Name2',99, 104),
    (34,'2018-10-01 10:40:00', 'Name2',101, 106),
    (35,'2018-10-01 10:50:00', 'Name2',104, 109),
    (36,'2018-10-01 11:00:00', 'Name2',101, 106),
    (37,'2018-10-01 11:10:00', 'Name2',99, 104),
    (38,'2018-10-01 11:20:00', 'Name2',106, 111),
    (39,'2018-10-01 11:30:00', 'Name2',103, 108),
    (40,'2018-10-01 11:40:00', 'Name2',105, 110),
    (41,'2018-10-01 11:50:00', 'Name2',108, 113),
    (42,'2018-10-01 12:00:00', 'Name2',105, 110),
    (43,'2018-10-01 12:10:00', 'Name2',104, 109),
    (44,'2018-10-01 12:20:00', 'Name2',108, 113),
    (45,'2018-10-01 12:30:00', 'Name2',110, 115),
    (46,'2018-10-01 12:40:00', 'Name2',105, 110)
;

我尝试了什么:

我可以使用以下查询来获得入口和出口点的第一个实例,该查询先找到第一个入口点PKey,然后在入口点PKey之后找到第一个出口点

declare @EntryPrice1 float = 101.0; -- Entry when Price1 <= 101.0 (when not already Entered)
declare @ExitPrice2 float = 113.0; -- Exit when Price2 >= 113.0 (after Entry only)

select 
 t1.[Name]
,t2.[Entry PKey]
,min(case when t1.[Price2] >= @ExitPrice2 and t1.[PKey] > t2.[Entry PKey] then t1.[PKey] else null end) as [Exit PKey]
from [dbo].[Control] t1
left outer join
(select min(case when [Price1] <= @EntryPrice1 then [PKey] else null end) as [Entry PKey]
,[Name]
from [dbo].[Control]
group by [Name]) t2
on t1.[Name] = t2.[Name]
group by t1.[Name],t2.[Entry PKey]

--Name  Entry PKey  Exit PKey
--Name1     3       6
--Name2     25      28

enter image description here

我坚持使用允许返回多个入口/出口点的方法,并且不确定在SQL中是否可行。

进入出口点的逻辑是

条目-价格1 <= 101.0并且尚未出现在尚未退出的条目中。

退出-价格2> = 113.0时,在条目内。

2 个答案:

答案 0 :(得分:2)

这是一种间隙和孤岛问题,这是使用Windowed Aggregates的通用解决方案(适用于大多数DBMS):

declare @EntryPrice1 float = 101.0; -- Entry when Price1 <= 101.0 (when not already Entered)
declare @ExitPrice2 float = 113.0; -- Exit when Price2 >= 113.0 (after Entry only)

WITH cte AS 
 ( -- apply your logic to mark potential entry and exit rows
   SELECT *
     ,CASE WHEN Price1 <= @EntryPrice1 THEN Timestamp END AS possibleEntry 
     ,CASE WHEN Price2 >= @ExitPrice2  THEN Timestamp END AS possibleExit
     ,Max(CASE WHEN Price1 <= @EntryPrice1 THEN Timestamp END) -- most recent possibleEntry
      Over (PARTITION BY Name
            ORDER BY Timestamp
            ROWS Unbounded Preceding) AS lastEntry 
     ,Max(CASE WHEN Price2 >= @ExitPrice2 THEN Timestamp END) -- most recent possibleExit
      Over (PARTITION BY Name
            ORDER BY Timestamp
            ROWS BETWEEN Unbounded Preceding AND 1 Preceding) AS lastExit
   FROM [dbo].[Control] 
 )
-- SELECT * FROM cte ORDER BY Name, PKey
,groupRows AS 
 ( -- mark rows from the 1st entry to the exit row
   SELECT *
     -- if lastEntry <= lastExit we're after an exit and before an entry -> don't return this row
     ,CASE WHEN lastEntry <= lastExit THEN 0 ELSE 1 END AS returnFlag
     -- assign the same group number to consecutive rows in group 
     ,Sum(CASE WHEN lastEntry <= lastExit THEN 1 ELSE 0 END)
      Over (PARTITION BY Name
            ORDER BY Timestamp
            ROWS Unbounded Preceding) AS grp
   FROM cte
   WHERE (possibleEntry IS NOT NULL OR possibleExit IS NOT NULL)
     AND lastEntry IS NOT NULL
 )
-- SELECT * FROM groupRows ORDER BY  Name, PKey
,rowNum AS
 ( -- get the data from the first and last row of an entry/exit group
   SELECT *
     -- to get the values of the 1st row in a group
     ,Row_Number() Over (PARTITION BY Name, grp ORDER BY Timestamp) AS rn
     -- to get the values of the last row in a group
     ,Last_Value(Price2)
      Over (PARTITION BY Name, grp
            ORDER BY Timestamp
            ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitPrice
     ,Last_Value(possibleExit)
      Over (PARTITION BY Name, grp
            ORDER BY Timestamp
            ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitTimestamp
     ,Last_Value(CASE WHEN possibleExit IS NOT NULL THEN PKey END)
      Over (PARTITION BY Name, grp
            ORDER BY Timestamp
            ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitPKey
   FROM groupRows
   WHERE returnFlag = 1
 )
SELECT Name
  ,Price1 AS EntryPrice
  ,ExitPrice
  ,Timestamp AS EntryTimestamp
  ,ExitTimestamp
  ,PKey AS EntryPKey
  ,ExitPKey
FROM rowNum
WHERE rn = 1 -- return 1st row of each group
ORDER BY Name, Timestamp

请参见dbfiddle

当然可以简化逻辑或应用某些专有的SQL Server语法...

答案 1 :(得分:0)

这是荒岛的怪异形式。从进入和退出的基本定义开始:

  select c.*,
         (case when [Price1] <= @EntryPrice1 then 1 else 0 end) as is_entry,
         (case when [Price2] >= @ExitPrice2 then 1 else 0 end) as is_exit
  from control c;

这不太有效,因为两个相邻的“条目”仅算作一个条目。通过查看上一个进入/退出时间,我们可以获得所需的信息。通过这种逻辑,我们可以确定哪些条目是“真实的”。我们也可能会得到下一次 exit 时间:

with cee as (
      select c.*,
             (case when [Price1] <= @EntryPrice1 then 1 else 0 end) as is_entry,
             (case when [Price2] >= @ExitPrice2 then 1 else 0 end) as is_exit
      from control c
     ),
     cp as (
      select cee.*,
             max(case when is_entry = 1 then pkey end) over (partition by name order by timestamp rows between unbounded preceding and 1 preceding) as prev_entry,
             max(case when is_exit = 1 then pkey end) over (partition by name order by timestamp) as prev_exit,
             min(case when is_exit = 1 then pkey end) over (partition by name order by timestamp desc) as next_exit
      from cee
     )

接下来,使用此逻辑生成实际条目的累积总和,然后执行一些花式过滤:

with cee as (
      select c.*,
             (case when [Price1] <= @EntryPrice1 then 1 else 0 end) as is_entry,
             (case when [Price1] >= @ExitPrice1 then 1 else 0 end) as is_exit
      from control c
     ),
     cp as (
      select cee.*,
             max(case when is_entry = 1 then pkey end) over (partition by name order by timestamp rows between unbounded preceding and 1 preceding) as prev_entry,
             max(case when is_exit = 1 then pkey end) over (partition by name order by timestamp) as prev_exit,
             min(case when is_exit = 1 then pkey end) over (partition by name order by timestamp desc) as next_exit
      from cee
     )
select *
from cp
where cp.is_entry = 1 and
      (prev_entry is null or prev_exit > prev_entry)

这为您提供了条目开始的行。您可以加入以获得所需的其他信息。