我一直致力于优化将价格数据压缩为O-H-L-C区间的查询。我尝试使用单个查询来完成此操作,而不必使用带行分区的多个查询来确定打开和关闭。
在我昨天发布的一个问题的回复的帮助下,我到目前为止已经提出了这个问题:
DECLARE @Interval INT = 5
DECLARE @InstrumentId INT = 36
DECLARE @Start_Date DATETIME = '2015-01-01'
DECLARE @End_Date DATETIME = '2015-03-30'
DECLARE @OffsetTime DATETIME = 0
SELECT INSTRUMENT_ID,
DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime) INTERVAL_TIME,
SUBSTRING(MIN(CONVERT(VARCHAR(24),[TIME_STAMP],21) + '_' +
CAST(RATE_BID AS VARCHAR(10))),25,8) [OPEN],
MAX(RATE_BID) HIGH,
MIN(RATE_BID) LOW,
SUBSTRING(MAX(CONVERT(VARCHAR(24),[TIME_STAMP],21) + '_' +
CAST(RATE_BID AS VARCHAR(10))),25,8) [CLOSE]
FROM dbo.TICKS
WHERE INSTRUMENT_ID = @InstrumentId AND TIME_STAMP BETWEEN @Start_Date AND @End_Date
GROUP BY DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime),
INSTRUMENT_ID
ORDER BY INTERVAL_TIME
是否有更有效的方法将日期/时间连接到价格,然后在评估MIN和MAX时仅提取价格?我正在考虑日期/时间的二进制表示,将其添加到价格中,然后进行一些操作来提取价格。我不太清楚从哪里开始。
答案 0 :(得分:1)
要测试查询的性能,使用DDL和示例数据会很有用。我假设以下表结构:
CREATE TABLE TICKS (
INSTRUMENT_ID INT,
TIME_STAMP DATETIME,
PRIMARY KEY (INSTRUMENT_ID, TIME_STAMP),
RATE_BID INT NOT NULL
)
为了生成一些样本数据,我使用了以下代码:
CREATE FUNCTION dbo.Numbers(@N int)
RETURNS TABLE AS RETURN
WITH
L0 AS(SELECT 1 AS C UNION ALL SELECT 1 AS O), -- 2 rows
L1 AS(SELECT 1 AS C FROM L0 AS A CROSS JOIN L0 AS B), -- 4 rows
L2 AS(SELECT 1 AS C FROM L1 AS A CROSS JOIN L1 AS B), -- 16 rows
L3 AS(SELECT 1 AS C FROM L2 AS A CROSS JOIN L2 AS B), -- 256 rows
L4 AS(SELECT 1 AS C FROM L3 AS A CROSS JOIN L3 AS B), -- 65,536 rows
L5 AS(SELECT 1 AS C FROM L4 AS A CROSS JOIN L4 AS B), -- 4,294,967,296 rows
Nums AS(SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS Number FROM L5)
SELECT Number FROM Nums WHERE Number<=@N
GO
--DELETE dbo.TICKS
SET NOCOUNT ON
DECLARE @INSTRUMENT_ID INT
SET @INSTRUMENT_ID=1
WHILE @INSTRUMENT_ID<50 BEGIN
DECLARE RandomData CURSOR LOCAL READ_ONLY FOR
SELECT TIME_STAMP, CONVERT(INT,RAND(CHECKSUM(NEWID()))*10) AS Delta,
CONVERT(BIT,CONVERT(INT,RAND(CHECKSUM(NEWID()))*1.1)) AS ChangeDirection
FROM (
--SELECT DATEADD(MINUTE,Number,'20150101') AS TIME_STAMP FROM dbo.Numbers(150000)
SELECT DATEADD(SECOND,Number*10,'20150101') AS TIME_STAMP FROM dbo.Numbers(900000)
) x
WHERE DATEPART(HOUR,TIME_STAMP) BETWEEN 8 AND 15
AND (DATEPART(WEEKDAY,TIME_STAMP)+@@DATEFIRST)%7>1
AND TIME_STAMP>'20150103'
OPEN RandomData
DECLARE @TIME_STAMP DATETIME, @Delta INT, @ChangeDirection BIT
DECLARE @RATE_BID INT, @Direction SMALLINT
SET @RATE_BID=CONVERT(INT,RAND(CHECKSUM(NEWID()))*10000)+100
SET @Direction=1
WHILE 1=1 BEGIN
FETCH NEXT FROM RandomData INTO @TIME_STAMP, @Delta, @ChangeDirection
IF @@FETCH_STATUS<>0 BREAK
SET @Direction=CASE WHEN @ChangeDirection=1 THEN -@Direction ELSE @Direction END
IF @RATE_BID<100 AND @Direction<0 SET @Direction=1
SET @RATE_BID=@RATE_BID+@Delta*@Direction
INSERT INTO dbo.TICKS VALUES (@INSTRUMENT_ID, @TIME_STAMP, @RATE_BID)
END
CLOSE RandomData
DEALLOCATE RandomData
SET @INSTRUMENT_ID=@INSTRUMENT_ID+1
END
SET NOCOUNT OFF
然后我针对使用二进制数据类型而不是字符串的变体测试了您的原始查询:
DECLARE @Interval INT = 5
DECLARE @InstrumentId INT = 36
DECLARE @Start_Date DATETIME = '2015-01-01'
DECLARE @End_Date DATETIME = '2015-03-30'
DECLARE @OffsetTime DATETIME = 0
DECLARE @StartTime DATETIME
SET @StartTime=GETDATE()
SELECT INSTRUMENT_ID,
DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime) INTERVAL_TIME,
SUBSTRING(MIN(CONVERT(VARCHAR(24),[TIME_STAMP],21) + '_' +
CAST(RATE_BID AS VARCHAR(10))),25,8) [OPEN],
MAX(RATE_BID) HIGH,
MIN(RATE_BID) LOW,
SUBSTRING(MAX(CONVERT(VARCHAR(24),[TIME_STAMP],21) + '_' +
CAST(RATE_BID AS VARCHAR(10))),25,8) [CLOSE]
FROM dbo.TICKS
WHERE INSTRUMENT_ID = @InstrumentId AND TIME_STAMP BETWEEN @Start_Date AND @End_Date
GROUP BY DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime),
INSTRUMENT_ID
ORDER BY INTERVAL_TIME
PRINT CONVERT(NUMERIC(10,3),DATEDIFF(MS,@StartTime,GETDATE())/1000.)
SET @StartTime=GETDATE()
SELECT INSTRUMENT_ID,
DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime) INTERVAL_TIME,
CONVERT(INT,SUBSTRING(MIN(CONVERT(BINARY(8),[TIME_STAMP]) +
CAST(RATE_BID AS BINARY(4))),9,4)) [OPEN],
MAX(RATE_BID) HIGH,
MIN(RATE_BID) LOW,
CONVERT(INT,SUBSTRING(MAX(CONVERT(BINARY(8),[TIME_STAMP]) +
CAST(RATE_BID AS BINARY(4))),9,4)) [CLOSE]
FROM dbo.TICKS
WHERE INSTRUMENT_ID = @InstrumentId AND TIME_STAMP BETWEEN @Start_Date AND @End_Date
GROUP BY DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime),
INSTRUMENT_ID
ORDER BY INTERVAL_TIME
PRINT CONVERT(NUMERIC(10,3),DATEDIFF(MS,@StartTime,GETDATE())/1000.)
在我的系统上,我的字符串版本在490ms内执行,二进制版本在293ms内执行。