在SQL中实际上可以进行以下操作吗?
我有一些时间序列数据,我想根据价格提取一些出入点。
所需的输出:
示例数据:
SQL数据:
CREATE TABLE Control
([PKey] int, [TimeStamp] datetime, [Name] varchar(10), [Price1] float, [Price2] float);
INSERT INTO Control ([PKey], [TimeStamp], [Name], [Price1], [Price2])
VALUES
(1,'2018-10-01 09:00:00', 'Name1',120, 125),
(2,'2018-10-01 09:10:00', 'Name1',110, 115),
(3,'2018-10-01 09:20:00', 'Name1',101, 106),
(4,'2018-10-01 09:30:00', 'Name1',105, 110),
(5,'2018-10-01 09:40:00', 'Name1',106, 111),
(6,'2018-10-01 09:50:00', 'Name1',108, 113),
(7,'2018-10-01 10:00:00', 'Name1',110, 115),
(8,'2018-10-01 10:10:00', 'Name1',104, 109),
(9,'2018-10-01 10:20:00', 'Name1',101, 106),
(10,'2018-10-01 10:30:00', 'Name1',99, 104),
(11,'2018-10-01 10:40:00', 'Name1',95, 100),
(12,'2018-10-01 10:50:00', 'Name1',101, 106),
(13,'2018-10-01 11:00:00', 'Name1',102, 107),
(14,'2018-10-01 11:10:00', 'Name1',101, 106),
(15,'2018-10-01 11:20:00', 'Name1',99, 104),
(16,'2018-10-01 11:30:00', 'Name1',105, 110),
(17,'2018-10-01 11:40:00', 'Name1',108, 113),
(18,'2018-10-01 11:50:00', 'Name1',108, 113),
(19,'2018-10-01 12:00:00', 'Name1',109, 114),
(20,'2018-10-01 12:10:00', 'Name1',108, 113),
(21,'2018-10-01 12:20:00', 'Name1',105, 110),
(22,'2018-10-01 12:30:00', 'Name1',101, 106),
(23,'2018-10-01 12:40:00', 'Name1',102, 107),
(24,'2018-10-01 09:00:00', 'Name2',103, 108),
(25,'2018-10-01 09:10:00', 'Name2',101, 106),
(26,'2018-10-01 09:20:00', 'Name2',104, 109),
(27,'2018-10-01 09:30:00', 'Name2',106, 111),
(28,'2018-10-01 09:40:00', 'Name2',108, 113),
(29,'2018-10-01 09:50:00', 'Name2',108, 113),
(30,'2018-10-01 10:00:00', 'Name2',105, 110),
(31,'2018-10-01 10:10:00', 'Name2',103, 108),
(32,'2018-10-01 10:20:00', 'Name2',101, 106),
(33,'2018-10-01 10:30:00', 'Name2',99, 104),
(34,'2018-10-01 10:40:00', 'Name2',101, 106),
(35,'2018-10-01 10:50:00', 'Name2',104, 109),
(36,'2018-10-01 11:00:00', 'Name2',101, 106),
(37,'2018-10-01 11:10:00', 'Name2',99, 104),
(38,'2018-10-01 11:20:00', 'Name2',106, 111),
(39,'2018-10-01 11:30:00', 'Name2',103, 108),
(40,'2018-10-01 11:40:00', 'Name2',105, 110),
(41,'2018-10-01 11:50:00', 'Name2',108, 113),
(42,'2018-10-01 12:00:00', 'Name2',105, 110),
(43,'2018-10-01 12:10:00', 'Name2',104, 109),
(44,'2018-10-01 12:20:00', 'Name2',108, 113),
(45,'2018-10-01 12:30:00', 'Name2',110, 115),
(46,'2018-10-01 12:40:00', 'Name2',105, 110)
;
我尝试了什么:
我可以使用以下查询来获得入口和出口点的第一个实例,该查询先找到第一个入口点PKey,然后在入口点PKey之后找到第一个出口点
declare @EntryPrice1 float = 101.0; -- Entry when Price1 <= 101.0 (when not already Entered)
declare @ExitPrice2 float = 113.0; -- Exit when Price2 >= 113.0 (after Entry only)
select
t1.[Name]
,t2.[Entry PKey]
,min(case when t1.[Price2] >= @ExitPrice2 and t1.[PKey] > t2.[Entry PKey] then t1.[PKey] else null end) as [Exit PKey]
from [dbo].[Control] t1
left outer join
(select min(case when [Price1] <= @EntryPrice1 then [PKey] else null end) as [Entry PKey]
,[Name]
from [dbo].[Control]
group by [Name]) t2
on t1.[Name] = t2.[Name]
group by t1.[Name],t2.[Entry PKey]
--Name Entry PKey Exit PKey
--Name1 3 6
--Name2 25 28
我坚持使用允许返回多个入口/出口点的方法,并且不确定在SQL中是否可行。
进入出口点的逻辑是
条目-价格1 <= 101.0并且尚未出现在尚未退出的条目中。
退出-价格2> = 113.0时,在条目内。
答案 0 :(得分:2)
这是一种间隙和孤岛问题,这是使用Windowed Aggregates的通用解决方案(适用于大多数DBMS):
declare @EntryPrice1 float = 101.0; -- Entry when Price1 <= 101.0 (when not already Entered)
declare @ExitPrice2 float = 113.0; -- Exit when Price2 >= 113.0 (after Entry only)
WITH cte AS
( -- apply your logic to mark potential entry and exit rows
SELECT *
,CASE WHEN Price1 <= @EntryPrice1 THEN Timestamp END AS possibleEntry
,CASE WHEN Price2 >= @ExitPrice2 THEN Timestamp END AS possibleExit
,Max(CASE WHEN Price1 <= @EntryPrice1 THEN Timestamp END) -- most recent possibleEntry
Over (PARTITION BY Name
ORDER BY Timestamp
ROWS Unbounded Preceding) AS lastEntry
,Max(CASE WHEN Price2 >= @ExitPrice2 THEN Timestamp END) -- most recent possibleExit
Over (PARTITION BY Name
ORDER BY Timestamp
ROWS BETWEEN Unbounded Preceding AND 1 Preceding) AS lastExit
FROM [dbo].[Control]
)
-- SELECT * FROM cte ORDER BY Name, PKey
,groupRows AS
( -- mark rows from the 1st entry to the exit row
SELECT *
-- if lastEntry <= lastExit we're after an exit and before an entry -> don't return this row
,CASE WHEN lastEntry <= lastExit THEN 0 ELSE 1 END AS returnFlag
-- assign the same group number to consecutive rows in group
,Sum(CASE WHEN lastEntry <= lastExit THEN 1 ELSE 0 END)
Over (PARTITION BY Name
ORDER BY Timestamp
ROWS Unbounded Preceding) AS grp
FROM cte
WHERE (possibleEntry IS NOT NULL OR possibleExit IS NOT NULL)
AND lastEntry IS NOT NULL
)
-- SELECT * FROM groupRows ORDER BY Name, PKey
,rowNum AS
( -- get the data from the first and last row of an entry/exit group
SELECT *
-- to get the values of the 1st row in a group
,Row_Number() Over (PARTITION BY Name, grp ORDER BY Timestamp) AS rn
-- to get the values of the last row in a group
,Last_Value(Price2)
Over (PARTITION BY Name, grp
ORDER BY Timestamp
ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitPrice
,Last_Value(possibleExit)
Over (PARTITION BY Name, grp
ORDER BY Timestamp
ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitTimestamp
,Last_Value(CASE WHEN possibleExit IS NOT NULL THEN PKey END)
Over (PARTITION BY Name, grp
ORDER BY Timestamp
ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitPKey
FROM groupRows
WHERE returnFlag = 1
)
SELECT Name
,Price1 AS EntryPrice
,ExitPrice
,Timestamp AS EntryTimestamp
,ExitTimestamp
,PKey AS EntryPKey
,ExitPKey
FROM rowNum
WHERE rn = 1 -- return 1st row of each group
ORDER BY Name, Timestamp
请参见dbfiddle
当然可以简化逻辑或应用某些专有的SQL Server语法...
答案 1 :(得分:0)
这是荒岛的怪异形式。从进入和退出的基本定义开始:
select c.*,
(case when [Price1] <= @EntryPrice1 then 1 else 0 end) as is_entry,
(case when [Price2] >= @ExitPrice2 then 1 else 0 end) as is_exit
from control c;
这不太有效,因为两个相邻的“条目”仅算作一个条目。通过查看上一个进入/退出时间,我们可以获得所需的信息。通过这种逻辑,我们可以确定哪些条目是“真实的”。我们也可能会得到下一次 exit 时间:
with cee as (
select c.*,
(case when [Price1] <= @EntryPrice1 then 1 else 0 end) as is_entry,
(case when [Price2] >= @ExitPrice2 then 1 else 0 end) as is_exit
from control c
),
cp as (
select cee.*,
max(case when is_entry = 1 then pkey end) over (partition by name order by timestamp rows between unbounded preceding and 1 preceding) as prev_entry,
max(case when is_exit = 1 then pkey end) over (partition by name order by timestamp) as prev_exit,
min(case when is_exit = 1 then pkey end) over (partition by name order by timestamp desc) as next_exit
from cee
)
接下来,使用此逻辑生成实际条目的累积总和,然后执行一些花式过滤:
with cee as (
select c.*,
(case when [Price1] <= @EntryPrice1 then 1 else 0 end) as is_entry,
(case when [Price1] >= @ExitPrice1 then 1 else 0 end) as is_exit
from control c
),
cp as (
select cee.*,
max(case when is_entry = 1 then pkey end) over (partition by name order by timestamp rows between unbounded preceding and 1 preceding) as prev_entry,
max(case when is_exit = 1 then pkey end) over (partition by name order by timestamp) as prev_exit,
min(case when is_exit = 1 then pkey end) over (partition by name order by timestamp desc) as next_exit
from cee
)
select *
from cp
where cp.is_entry = 1 and
(prev_entry is null or prev_exit > prev_entry)
这为您提供了条目开始的行。您可以加入以获得所需的其他信息。