我正在处理一些当前以1分钟为间隔存储的数据,如下所示:
CREATE TABLE #MinuteData
(
[Id] INT ,
[MinuteBar] DATETIME ,
[Open] NUMERIC(12, 6) ,
[High] NUMERIC(12, 6) ,
[Low] NUMERIC(12, 6) ,
[Close] NUMERIC(12, 6)
);
INSERT INTO #MinuteData
( [Id], [MinuteBar], [Open], [High], [Low], [Close] )
VALUES ( 1, '2015-01-01 17:00:00', 1.557870, 1.557880, 1.557870, 1.557880 ),
( 2, '2015-01-01 17:01:00', 1.557900, 1.557900, 1.557880, 1.557880 ),
( 3, '2015-01-01 17:02:00', 1.557960, 1.558070, 1.557960, 1.558040 ),
( 4, '2015-01-01 17:03:00', 1.558080, 1.558100, 1.558040, 1.558050 ),
( 5, '2015-01-01 17:04:00', 1.558050, 1.558100, 1.558020, 1.558030 ),
( 6, '2015-01-01 17:05:00', 1.558580, 1.558710, 1.557870, 1.557950 ),
( 7, '2015-01-01 17:06:00', 1.557910, 1.558120, 1.557910, 1.557990 ),
( 8, '2015-01-01 17:07:00', 1.557940, 1.558250, 1.557940, 1.558170 ),
( 9, '2015-01-01 17:08:00', 1.558140, 1.558200, 1.558080, 1.558120 ),
( 10, '2015-01-01 17:09:00', 1.558110, 1.558140, 1.557970, 1.557970 );
SELECT *
FROM #MinuteData;
DROP TABLE #MinuteData;
值跟踪货币汇率,因此对于每分钟间隔(条形),分钟开始时的Open
价格和分钟结束的Close
价格。 High
和Low
值表示每个分钟内的最高和最低费率。
所需输出
我希望将这些数据重新格式化为5分钟,以产生以下输出:
MinuteBar Open Close Low High
2015-01-01 17:00:00.000 1.557870 1.558030 1.557870 1.558100
2015-01-01 17:05:00.000 1.558580 1.557970 1.557870 1.558710
这将取5的第一分钟的Open
值,即5的最后一分钟的Close
值。High
和Low
值代表5分钟内最高high
和最低low
费率。
当前解决方案
我有一个解决方案可以做到这一点(下面),但它感觉不够优雅,因为它依赖于id
值和自联接。此外,我打算在更大的数据集上运行它,所以我希望在可能的情况下以更有效的方式执行它:
-- Create a column to allow grouping in 5 minute Intervals
SELECT Id, MinuteBar, [Open], High, Low, [Close],
DATEDIFF(MINUTE, '2015-01-01T00:00:00', MinuteBar)/5 AS Interval
INTO #5MinuteData
FROM #MinuteData
ORDER BY minutebar
-- Group by inteval and aggregate prior to self join
SELECT Interval ,
MIN(MinuteBar) AS MinuteBar ,
MIN(Id) AS OpenId ,
MAX(Id) AS CloseId ,
MIN(Low) AS Low ,
MAX(High) AS High
INTO #DataMinMax
FROM #5MinuteData
GROUP BY Interval;
-- Self join to get the Open and Close values
SELECT t1.Interval ,
t1.MinuteBar ,
tOpen.[Open] ,
tClose.[Close] ,
t1.Low ,
t1.High
FROM #DataMinMax t1
INNER JOIN #5MinuteData tOpen ON tOpen.Id = OpenId
INNER JOIN #5MinuteData tClose ON tClose.Id = CloseId;
DROP TABLE #DataMinMax
DROP TABLE #5MinuteData
返工尝试
而不是上面的查询,我一直在考虑使用FIRST_VALUE和LAST_VALUE,因为它似乎是我所追求的,但我不能完全使用它分组,我正在做。可能有比我正在尝试做的更好的解决方案,所以我愿意接受建议。目前我正在尝试这样做:
SELECT MIN(MinuteBar) MinuteBar5 ,
FIRST_VALUE([Open]) OVER (ORDER BY MinuteBar) AS Opening,
MAX(High) AS High ,
MIN(Low) AS Low ,
LAST_VALUE([Close]) OVER (ORDER BY MinuteBar) AS Closing ,
DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 AS Interval
FROM #MinuteData
GROUP BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5
这给了我以下错误,该错误与FIRST_VALUE
和LAST_VALUE
相关,因为如果删除这些行,查询会运行:
列'#MinuteData.MinuteBar'在选择列表中无效,因为它不包含在聚合函数或GROUP BY子句中。
答案 0 :(得分:13)
SELECT
MIN(MinuteBar) AS MinuteBar5,
Opening,
MAX(High) AS High,
MIN(Low) AS Low,
Closing,
Interval
FROM
(
SELECT FIRST_VALUE([Open]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar) AS Opening,
FIRST_VALUE([Close]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar DESC) AS Closing,
DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 AS Interval,
*
FROM #MinuteData
) AS T
GROUP BY Interval, Opening, Closing
接近当前解决方案的解决方案。有两个地方你做错了。
LAST_VALUE是当前窗口的最后一个值,在查询中未指定,默认窗口是从当前分区的第一行到当前行的行。您可以将FIRST_VALUE与deseeding order一起使用,也可以指定一个窗口
LAST_VALUE([Close]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5
ORDER BY MinuteBar
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Closing,
答案 1 :(得分:2)
这是一种没有临时表的方法:
;WITH CTEInterval AS
( -- This replaces your first temporary table (#5MinuteData)
SELECT [Id],
[MinuteBar],
[Open],
[High],
[Low],
[Close],
DATEPART(MINUTE, MinuteBar)/5 AS Interval
FROM #MinuteData
), CTEOpenClose as
( -- this is instead of your second temporary table (#DataMinMax)
SELECT [Id],
[MinuteBar],
FIRST_VALUE([Open]) OVER (PARTITION BY Interval ORDER BY MinuteBar) As [Open],
[High],
[Low],
FIRST_VALUE([Close]) OVER (PARTITION BY Interval ORDER BY MinuteBar DESC) As [Close],
Interval
FROM CTEInterval
)
-- This is the final select
SELECT MIN([MinuteBar]) as [MinuteBar],
AVG([Open]) as [Open], -- All values of [Open] in the same interval are the same...
AVG([Close]) as [Close], -- All values of [Close] in the same interval are the same...
MIN([Low]) as [Low],
MAX([High]) as [High]
FROM CTEOpenClose
GROUP BY Interval
结果:
MinuteBar Open Close Low High
2015-01-01 17:00:00.000 1.557870 1.558030 1.557870 1.558100
2015-01-01 17:05:00.000 1.558580 1.557970 1.557870 1.558710
答案 2 :(得分:2)
;with cte
as
(--this can be your permanent table with intervals ,rather than generating on fly
select cast('2015-01-01 17:00:00.000' as datetime) as interval,dateadd(mi,5,'2015-01-01 17:00:00.000') as nxtinterval
union all
select dateadd(mi,5,interval),dateadd(mi,5,nxtinterval) from cte
where interval<='2015-01-01 17:45:00.000'
)
,finalcte
as
(select minutebar,
low,high,
dense_rank() over (order by interval,nxtinterval) as grpd,
last_value([close]) over ( partition by interval,nxtinterval order by interval,nxtinterval) as [close],
first_value([open]) over (partition by interval,nxtinterval order by interval,nxtinterval) as [open]
from cte c
join
#minutedata m
on m.minutebar between interval and nxtinterval
)
select
min(minutebar) as minutebar,
min(low) as 'low',
max(high) as 'High',
max([open]) as 'open',
max([close]) as 'close'
from finalcte
group by grpd