优化查询以将价格数据压缩为O-H-L-C区间

时间:2015-04-17 14:42:06

标签: sql-server-2008 tsql

我一直致力于优化将价格数据压缩为O-H-L-C区间的查询。我尝试使用单个查询来完成此操作,而不必使用带行分区的多个查询来确定打开和关闭。

在我昨天发布的一个问题的回复的帮助下,我到目前为止已经提出了这个问题:

 DECLARE @Interval INT = 5
 DECLARE @InstrumentId INT = 36
 DECLARE @Start_Date DATETIME = '2015-01-01'
 DECLARE @End_Date DATETIME = '2015-03-30'
 DECLARE @OffsetTime DATETIME = 0

   SELECT INSTRUMENT_ID, 
         DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime) INTERVAL_TIME,
         SUBSTRING(MIN(CONVERT(VARCHAR(24),[TIME_STAMP],21) + '_' + 
                       CAST(RATE_BID AS VARCHAR(10))),25,8)                                          [OPEN],
         MAX(RATE_BID)                                                                                HIGH, 
         MIN(RATE_BID)                                                                                LOW,  
         SUBSTRING(MAX(CONVERT(VARCHAR(24),[TIME_STAMP],21) + '_' +
                       CAST(RATE_BID AS VARCHAR(10))),25,8)                                          [CLOSE]
    FROM dbo.TICKS 
   WHERE INSTRUMENT_ID = @InstrumentId AND TIME_STAMP BETWEEN @Start_Date AND @End_Date
GROUP BY DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime),    
         INSTRUMENT_ID
ORDER BY INTERVAL_TIME

是否有更有效的方法将日期/时间连接到价格,然后在评估MIN和MAX时仅提取价格?我正在考虑日期/时间的二进制表示,将其添加到价格中,然后进行一些操作来提取价格。我不太清楚从哪里开始。

1 个答案:

答案 0 :(得分:1)

要测试查询的性能,使用DDL和示例数据会很有用。我假设以下表结构:

CREATE TABLE TICKS (
    INSTRUMENT_ID INT,
    TIME_STAMP DATETIME,
    PRIMARY KEY (INSTRUMENT_ID, TIME_STAMP),
    RATE_BID INT NOT NULL
)

为了生成一些样本数据,我使用了以下代码:

CREATE FUNCTION dbo.Numbers(@N int)
RETURNS TABLE AS RETURN
WITH
L0   AS(SELECT 1 AS C UNION ALL SELECT 1 AS O), -- 2 rows
L1   AS(SELECT 1 AS C FROM L0 AS A CROSS JOIN L0 AS B), -- 4 rows
L2   AS(SELECT 1 AS C FROM L1 AS A CROSS JOIN L1 AS B), -- 16 rows
L3   AS(SELECT 1 AS C FROM L2 AS A CROSS JOIN L2 AS B), -- 256 rows
L4   AS(SELECT 1 AS C FROM L3 AS A CROSS JOIN L3 AS B), -- 65,536 rows
L5   AS(SELECT 1 AS C FROM L4 AS A CROSS JOIN L4 AS B), -- 4,294,967,296 rows
Nums AS(SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS Number FROM L5)
SELECT Number FROM Nums WHERE Number<=@N

GO
--DELETE dbo.TICKS

SET NOCOUNT ON
DECLARE @INSTRUMENT_ID INT
SET @INSTRUMENT_ID=1

WHILE @INSTRUMENT_ID<50 BEGIN
    DECLARE RandomData CURSOR LOCAL READ_ONLY FOR
    SELECT TIME_STAMP, CONVERT(INT,RAND(CHECKSUM(NEWID()))*10) AS Delta, 
        CONVERT(BIT,CONVERT(INT,RAND(CHECKSUM(NEWID()))*1.1)) AS ChangeDirection 
    FROM (
        --SELECT DATEADD(MINUTE,Number,'20150101') AS TIME_STAMP FROM dbo.Numbers(150000)
        SELECT DATEADD(SECOND,Number*10,'20150101') AS TIME_STAMP FROM dbo.Numbers(900000)
    ) x
    WHERE DATEPART(HOUR,TIME_STAMP) BETWEEN 8 AND 15
    AND (DATEPART(WEEKDAY,TIME_STAMP)+@@DATEFIRST)%7>1
    AND TIME_STAMP>'20150103'

    OPEN RandomData
    DECLARE @TIME_STAMP DATETIME, @Delta INT, @ChangeDirection BIT

    DECLARE @RATE_BID INT, @Direction SMALLINT
    SET @RATE_BID=CONVERT(INT,RAND(CHECKSUM(NEWID()))*10000)+100
    SET @Direction=1

    WHILE 1=1 BEGIN
        FETCH NEXT FROM RandomData INTO @TIME_STAMP, @Delta, @ChangeDirection
        IF @@FETCH_STATUS<>0 BREAK

        SET @Direction=CASE WHEN @ChangeDirection=1 THEN -@Direction ELSE @Direction END
        IF @RATE_BID<100 AND @Direction<0 SET @Direction=1

        SET @RATE_BID=@RATE_BID+@Delta*@Direction

        INSERT INTO dbo.TICKS VALUES (@INSTRUMENT_ID, @TIME_STAMP, @RATE_BID)
    END

    CLOSE RandomData
    DEALLOCATE RandomData

    SET @INSTRUMENT_ID=@INSTRUMENT_ID+1
END
SET NOCOUNT OFF

然后我针对使用二进制数据类型而不是字符串的变体测试了您的原始查询:

DECLARE @Interval INT = 5
DECLARE @InstrumentId INT = 36
DECLARE @Start_Date DATETIME = '2015-01-01'
DECLARE @End_Date DATETIME = '2015-03-30'
DECLARE @OffsetTime DATETIME = 0

DECLARE @StartTime DATETIME
SET @StartTime=GETDATE()

  SELECT INSTRUMENT_ID, 
         DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime) INTERVAL_TIME,
         SUBSTRING(MIN(CONVERT(VARCHAR(24),[TIME_STAMP],21) + '_' + 
                       CAST(RATE_BID AS VARCHAR(10))),25,8)                                          [OPEN],
         MAX(RATE_BID)                                                                                HIGH, 
         MIN(RATE_BID)                                                                                LOW,  
         SUBSTRING(MAX(CONVERT(VARCHAR(24),[TIME_STAMP],21) + '_' +
                       CAST(RATE_BID AS VARCHAR(10))),25,8)                                          [CLOSE]
    FROM dbo.TICKS 
   WHERE INSTRUMENT_ID = @InstrumentId AND TIME_STAMP BETWEEN @Start_Date AND @End_Date
GROUP BY DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime),    
         INSTRUMENT_ID
ORDER BY INTERVAL_TIME

PRINT CONVERT(NUMERIC(10,3),DATEDIFF(MS,@StartTime,GETDATE())/1000.)
SET @StartTime=GETDATE()

  SELECT INSTRUMENT_ID, 
         DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime) INTERVAL_TIME,
         CONVERT(INT,SUBSTRING(MIN(CONVERT(BINARY(8),[TIME_STAMP]) + 
                       CAST(RATE_BID AS BINARY(4))),9,4))                                          [OPEN],
         MAX(RATE_BID)                                                                                HIGH, 
         MIN(RATE_BID)                                                                                LOW,  
         CONVERT(INT,SUBSTRING(MAX(CONVERT(BINARY(8),[TIME_STAMP]) + 
                       CAST(RATE_BID AS BINARY(4))),9,4))                                          [CLOSE]
    FROM dbo.TICKS 
   WHERE INSTRUMENT_ID = @InstrumentId AND TIME_STAMP BETWEEN @Start_Date AND @End_Date
GROUP BY DATEADD(minute,(DATEDIFF(minute,@OffsetTime,[TIME_STAMP])/@Interval)*@Interval,@OffsetTime),    
         INSTRUMENT_ID
ORDER BY INTERVAL_TIME

PRINT CONVERT(NUMERIC(10,3),DATEDIFF(MS,@StartTime,GETDATE())/1000.)

在我的系统上,我的字符串版本在490ms内执行,二进制版本在293ms内执行。