Question

我需要在SQL Server中执行此Oracle查询：

select case_id, channel_index,
     min(su_min) as sustained_min,
     max(su_max) as sustained_max
from (
    select case_id, channel_index, start_time,
        min(dms_value) over (partition by case_id, channel_index order by start_time 
             range numtodsinterval(3, 'minute') preceeding) as su_max,
        max(dms_value) over (partition by case_id, channel_index order by start_time 
             range numtodsinterval(3, 'minute') preceeding) as su_min, 
        min(start_time) over (partition by case_id, channel_index order by start_time)
             as first_time
    from  data_table order by start_time 
    ) as su_data
where  
    first_time + numtodsinterval(3, 'minute') <= start_time
group by
    case_id, channel_index

以下是我在基本T-SQL中尝试完成的工作，但是当案例有100万条+记录时，需要花费37分钟（之后我取消了查询）：

ALTER procedure [dbo].[GetSustainedValues]( 
  @case_id int,
  @time_limit int, 
  @bypass_only bit = NULL)
as 
begin

DECLARE @time DateTime, @channelindex int, @lastchannelindex int
DECLARE @tmin float, @tmax float, @min float, @max float, @caseid int

DECLARE @results TABLE(case_id int, channel_index int, max float null, min float null)
DECLARE CursorName CURSOR FAST_FORWARD
    FOR SELECT start_time, channel_index from continuous_data where case_id = @case_id order by channel_index, start_time
OPEN CursorName
FETCH NEXT FROM CursorName INTO @time, @channelindex
SET @lastchannelindex = @channelindex
WHILE @@FETCH_STATUS = 0
BEGIN
    --PRINT 'hello' --'Chennel:' + CONVERT (VARCHAR(50), @channelindex,128) + '  Time:' + CONVERT (VARCHAR(50), @time,128)
    IF @lastchannelindex != @channelindex
    BEGIN
        --PRINT 'Starting new channel:' + CONVERT (VARCHAR(50), @channelindex,128)
        -- we are starting on a new channel so insert that data into the results
        -- table and reset the min/max
        INSERT INTO @results(case_id, channel_index, max, min) VALUES(@case_id, @lastchannelindex, @max, @min)
        SET @max = null
        SET @min = null
        SET @lastchannelindex = @channelindex
    END

    Select @tmax = MAX(dms_value), @tmin = MIN(dms_value)
    from continuous_data
    where case_id = @case_id and channel_index = @channelindex and start_time between DATEADD(s, -(@time_limit-1), @time) and @time 
    HAVING SUM(value_duration) >= @time_limit
    IF @@ROWCOUNT > 0
    BEGIN
        IF @max IS null OR @tmin > @max
        BEGIN
            --PRINT 'Setting max:' + CONVERT (VARCHAR(50), @tmin,128) + ' for channel:' + CONVERT (VARCHAR(50), @channelindex,128)
            set @max = @tmin
        END

        IF @min IS null OR @tmax < @min
        BEGIN
            set @min = @tmax
        END
    END
    --PRINT 'Max:' + CONVERT (VARCHAR(50), @max,128) + '  Min:' + CONVERT (VARCHAR(50), @min,128)
    FETCH NEXT FROM CursorName INTO @time, @channelindex
END
CLOSE CursorName
DEALLOCATE CursorName
--PRINT 'Max:' + CONVERT (VARCHAR(50), @max,128) + '  Min:' + CONVERT (VARCHAR(50), @min,128)
SELECT * FROM @results
end

这是使用CLR存储过程的好地方吗？还有其他任何想法可以使这个查询更有效吗？

编辑3-9-2012：不要专注于“first_time”字段。它是确保3分钟窗口开始3分钟进入数据集。在我的查询中，我不关心first_time。我需要的是每个通道所有3分钟周期的最小/最大持续值。

以下是一些包含2个频道的示例数据。请注意，每个样本的持续时间并不总是相同：

CREATE TABLE #continuous_data
(
        case_id         int
    ,   channel_index   int
    ,   start_time      datetime
    ,   dms_value       float,
    ,   value_duration  smallint
)

INSERT #continuous_data VALUES (2081,   51, '2011-05-18 09:36:34.000',  90,     6)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:39.000',  94.8125,    1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:40.000',  95.4375,    1)
INSERT #continuous_data VALUES (2081,   51, '2011-05-18 09:36:40.000',  96,     6)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:41.000',  96.75,      1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:42.000',  98.0625,    2)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:44.000',  99.3125,    1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:45.000',  100.625,    1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:46.000',  101.9375,   2)
INSERT #continuous_data VALUES (2081,   51, '2011-05-18 09:36:46.000',  98,     6)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:48.000',  103.25,     1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:49.000',  104.5625,   1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:50.000',  105.8125,   2)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:52.000',  107.125,    1)
INSERT #continuous_data VALUES (2081,   51, '2011-05-18 09:36:52.000',  92,     6)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:53.000',  108.4375,   1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:54.000',  109.75,     1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:55.000',  111.0625,   2)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:57.000',  112.3125,   1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:58.000',  113.625,    1)
INSERT #continuous_data VALUES (2081,   51, '2011-05-18 09:36:58.000',  86,     6)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:36:59.000',  114.9375,   2)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:37:01.000',  116.25,     1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:37:02.000',  117.5,      1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:37:03.000',  118.8125,   2)
INSERT #continuous_data VALUES (2081,   51, '2011-05-18 09:37:04.000',  80,     6)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:37:05.000',  120.125,    1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:37:06.000',  121.4375,   1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:37:07.000',  122.75,     1)
INSERT #continuous_data VALUES (2081,   50, '2011-05-18 09:37:08.000',  124.0625,   1)

Answer 1

如果您要执行以下操作会怎么样？

SELECT dt2.case_id, dt2.channel_index, dtf.first_time, su_qry.su_min, su_qry.su_max
  FROM (SELECT   dt.case_id, dt.channel_index, dt.start_time, MIN (dms_value) AS su_min, MAX (dms_value) AS su_max
            FROM data_table dt
                 INNER JOIN
                 (SELECT case_id, channel_index, start_time, dateadd ('mi', start_time, -3) AS start_time_minus_3
                    FROM data_table) dtr
                 ON (    dt.case_id = dtr.case_id
                     AND dt.channel_index = dtr.channel_index
                     AND dt.start_time >= dtr.start_time_minus_3
                     AND dt.start_time <= start_time
                    )
        GROUP BY dt.case_id, dt.channel_index, dt.start_time) su_qry
       INNER JOIN
       (SELECT   case_id, channel_index, MIN (start_time)
            FROM data_table dt
        GROUP BY case_id, channel_index) dtf ON (su_qry.case_id = dtf.case_id AND su_qry.channel_index = dtf.channel_index)
       INNER JOIN data_table dt2 ON (su_qry.case_id = dt2.case_id AND su_qry.channel_index = dt2.channel_index)
 WHERE dateadd ('mi', dtf.first_time, 3) <= dt2.start_time

这不是100％，但我认为这可能会给你你想要的东西。基本上，我们通过执行大于和小于连接来找到数据表中每行的过去3分钟的最小值和最大值。我们将这些结果加到我们的“第一次”计算中，最后加到WHERE谓词的主表中。

Answer 2

如果我理解正确，您需要以下

对于每个case_id，channel_index组合：

找到所有3分钟窗口的最低MAX值（最小持续时间值）
查找所有3分钟窗口的最高MIN值（最大值持续价值）。
使用前3分钟的数据。如果自第一个（MIN）start_time值以来尚未过去3分钟，请排除该数据。

Oracle查询和您的解决方案（存储过程和CLR存储过程）之间仍存在一些无法解释的差异：

Oracle查询不能确保每个窗口的时差恰好是3分钟。它只需要前3分钟的最小/最大值。 WHERE子句first_time + numtodsinterval(3, 'minute') <= start_time删除前3分钟之前的时间窗口。
value_duration列位于示例数据中，但未在解决方案中使用
示例数据不包含3分钟的数据，因此我将时间范围更改为10秒
您没有列出样本数据的预期结果

<强>解 - 这可能不是最快的解决方案，但应该可行 -

步骤0 ：窗口时间范围 - 样本数据不包含3分钟的数据，因此我使用变量来保存窗口时间范围所需的秒数。对于实际数据，您可以使用180秒。

DECLARE @seconds int
SET @seconds = 10

第1步：第一次 - 虽然first_time并不重要，但仍有必要确保我们不包含不完整的时间段。稍后将使用它在第一个完整时间段过去之前排除数据。

-- Query to return the first_time, last_time, and range_time
-- range_time is first complete time period using the time range
SELECT  case_id 
    ,   channel_index 
    ,   MIN(start_time) AS first_time
    ,   DATEADD(ss, @seconds, MIN(start_time)) AS range_time
    ,   MAX(start_time) AS last_time
FROM    #continuous_data 
GROUP BY case_id, channel_index
ORDER BY case_id, channel_index

-- Results from the sample data
case_id     channel_index first_time              range_time              last_time
----------- ------------- ----------------------- ----------------------- -----------------------
2081        50            2011-05-18 09:36:39.000 2011-05-18 09:36:49.000 2011-05-18 09:37:08.000
2081        51            2011-05-18 09:36:34.000 2011-05-18 09:36:44.000 2011-05-18 09:37:04.000

第2步：时间窗 - Oracle查询使用partition by case_id, channel_index order by start_time range numtodsinterval(3, 'minute') preceeding查找子查询中的最小和最大dms_value以及first_time。由于SQL Server没有range功能，因此您需要使用子查询来定义3分钟的窗口。 Oracle查询使用range ... preceeding，因此SQL Server范围将使用带有负值的DATEADD：

-- Windowing for each time range. Window is the negative time
-- range from each start_time row
SELECT  case_id 
    ,   channel_index 
    ,   DATEADD(ss, -@seconds, start_time) AS window_start
    ,   start_time                         AS window_end
FROM    #continuous_data 
ORDER BY case_id, channel_index, start_time

步骤3 ：时间窗口的MIN / MAX - 接下来，您需要找到每个窗口的最小值和最大值。这是执行大部分计算的地方，需要大多数调试才能获得预期的结果。

-- Find the maximum and minimum values for each window range
-- I included the start_time min/max/diff for debugging
SELECT  su.case_id 
    ,   su.channel_index 
    ,   win.window_end 
    ,   MAX(dms_value) AS dms_max
    ,   MIN(dms_value) AS dms_min
    ,   MIN(su.start_time) AS time_min
    ,   MAX(su.start_time) AS time_max
    ,   DATEDIFF(ss, MIN(su.start_time), MAX(su.start_time)) AS time_diff
FROM    #continuous_data AS su
   JOIN (
        -- Windowing for each time range. Window is the negative time
        -- range from each start_time row
        SELECT  case_id 
            ,   channel_index 
            ,   DATEADD(ss, -@seconds, start_time) AS window_start
            ,   start_time                         AS window_end
        FROM    #continuous_data 
    ) AS win
        ON (    su.case_id       = win.case_id
            AND su.channel_index = win.channel_index)
   JOIN (
        -- Find the first_time and add the time range
        SELECT  case_id 
            ,   channel_index 
            ,   MIN(start_time)                        AS first_time
            ,   DATEADD(ss, @seconds, MIN(start_time)) AS range_time
        FROM    #continuous_data 
        GROUP BY case_id, channel_index
    ) AS fir
        ON (    su.case_id       = fir.case_id
            AND su.channel_index = fir.channel_index)
WHERE   su.start_time BETWEEN win.window_start AND win.window_end
    AND win.window_end >= fir.range_time
GROUP BY su.case_id, su.channel_index, win.window_end
ORDER BY su.case_id, su.channel_index, win.window_end

-- Results from sample data:
case_id     channel_index window_end              dms_max                dms_min                time_min                time_max                time_diff
----------- ------------- ----------------------- ---------------------- ---------------------- ----------------------- ----------------------- -----------
2081        50            2011-05-18 09:36:49.000 104.5625               94.8125                2011-05-18 09:36:39.000 2011-05-18 09:36:49.000 10
2081        50            2011-05-18 09:36:50.000 105.8125               95.4375                2011-05-18 09:36:40.000 2011-05-18 09:36:50.000 10
2081        50            2011-05-18 09:36:52.000 107.125                98.0625                2011-05-18 09:36:42.000 2011-05-18 09:36:52.000 10
2081        50            2011-05-18 09:36:53.000 108.4375               99.3125                2011-05-18 09:36:44.000 2011-05-18 09:36:53.000 9
2081        50            2011-05-18 09:36:54.000 109.75                 99.3125                2011-05-18 09:36:44.000 2011-05-18 09:36:54.000 10
2081        50            2011-05-18 09:36:55.000 111.0625               100.625                2011-05-18 09:36:45.000 2011-05-18 09:36:55.000 10
2081        50            2011-05-18 09:36:57.000 112.3125               103.25                 2011-05-18 09:36:48.000 2011-05-18 09:36:57.000 9
2081        50            2011-05-18 09:36:58.000 113.625                103.25                 2011-05-18 09:36:48.000 2011-05-18 09:36:58.000 10
2081        50            2011-05-18 09:36:59.000 114.9375               104.5625               2011-05-18 09:36:49.000 2011-05-18 09:36:59.000 10
2081        50            2011-05-18 09:37:01.000 116.25                 107.125                2011-05-18 09:36:52.000 2011-05-18 09:37:01.000 9
2081        50            2011-05-18 09:37:02.000 117.5                  107.125                2011-05-18 09:36:52.000 2011-05-18 09:37:02.000 10
2081        50            2011-05-18 09:37:03.000 118.8125               108.4375               2011-05-18 09:36:53.000 2011-05-18 09:37:03.000 10
2081        50            2011-05-18 09:37:05.000 120.125                111.0625               2011-05-18 09:36:55.000 2011-05-18 09:37:05.000 10
2081        50            2011-05-18 09:37:06.000 121.4375               112.3125               2011-05-18 09:36:57.000 2011-05-18 09:37:06.000 9
2081        50            2011-05-18 09:37:07.000 122.75                 112.3125               2011-05-18 09:36:57.000 2011-05-18 09:37:07.000 10
2081        50            2011-05-18 09:37:08.000 124.0625               113.625                2011-05-18 09:36:58.000 2011-05-18 09:37:08.000 10
2081        51            2011-05-18 09:36:46.000 98                     96                     2011-05-18 09:36:40.000 2011-05-18 09:36:46.000 6
2081        51            2011-05-18 09:36:52.000 98                     92                     2011-05-18 09:36:46.000 2011-05-18 09:36:52.000 6
2081        51            2011-05-18 09:36:58.000 92                     86                     2011-05-18 09:36:52.000 2011-05-18 09:36:58.000 6
2081        51            2011-05-18 09:37:04.000 86                     80                     2011-05-18 09:36:58.000 2011-05-18 09:37:04.000 6

第4步：最后，您可以将它们放在一起，以返回每个时间窗口的最低MAX值和最高MIN值：

SELECT  su.case_id 
    ,   su.channel_index 
    ,   MIN(dms_max) AS su_min
    ,   MAX(dms_min) AS su_max
FROM    (
        SELECT  su.case_id 
            ,   su.channel_index 
            ,   win.window_end 
            ,   MAX(dms_value) AS dms_max
            ,   MIN(dms_value) AS dms_min
        FROM    #continuous_data AS su
           JOIN (
                -- Windowing for each time range. Window is the negative time
                -- range from each start_time row
                SELECT  case_id 
                    ,   channel_index 
                    ,   DATEADD(ss, -@seconds, start_time) AS window_start
                    ,   start_time                         AS window_end
                FROM    #continuous_data 
            ) AS win
                ON (    su.case_id       = win.case_id
                    AND su.channel_index = win.channel_index)
           JOIN (
                -- Find the first_time and add the time range
                SELECT  case_id 
                    ,   channel_index 
                    ,   MIN(start_time)                        AS first_time
                    ,   DATEADD(ss, @seconds, MIN(start_time)) AS range_time
                FROM    #continuous_data 
                GROUP BY case_id, channel_index
            ) AS fir
                ON (    su.case_id       = fir.case_id
                    AND su.channel_index = fir.channel_index)
        WHERE   su.start_time BETWEEN win.window_start AND win.window_end
            AND win.window_end >= fir.range_time
        GROUP BY su.case_id, su.channel_index, win.window_end
) AS su
GROUP BY su.case_id, su.channel_index
ORDER BY su.case_id, su.channel_index

-- Results from sample data:
case_id     channel_index su_min                 su_max
----------- ------------- ---------------------- ----------------------
2081        50            104.5625               113.625
2081        51            86                     96

Answer 3

好的，所以这里有一个解决问题的CLR存储过程。这将在大约3:05（分钟）内从包含110万条记录的案例中返回持续的最小/最大值。请告诉我是否有一种简单的T-SQL方法来实现这一点，因为我宁愿不走这条路。但是，关于如何提高速度的评论也将受到赞赏。

public partial class StoredProcedures
{
[Microsoft.SqlServer.Server.SqlProcedure]
public static void ComputeCaseSustainedChannelValues(int caseId, int seconds)
{
    SqlConnection con = new SqlConnection();
    SqlCommand cmd = new SqlCommand();

    try
    {
        con = new SqlConnection("context connection=true");
        con.Open();

        cmd = new SqlCommand(String.Format("Select channel_index, start_time, dms_value, value_duration from continuous_data where case_id = {0} and dms_type = 0 and error_code is NULL order by channel_index, start_time", caseId), con);
        SqlDataReader reader = cmd.ExecuteReader();

        Queue<ContinuousData> window = new Queue<ContinuousData>();
        ArrayList channelValues = new ArrayList();
        float? sus_min = null, sus_max = null;
        float? min = null, max = null;
        int currentChannel = -1;
        bool recalc = true;
        int recalccounter = 0;
        int rowcounter = 0;
        using (reader)
        {
            while (reader.Read())
            {
                var cd = new ContinuousData
                    {
                        ChannelIndex = reader.GetInt16(0),
                        StartTime = reader.GetDateTime(1),
                        DmsValue = (float)reader.GetSqlDouble(2),
                        Duration = reader.GetInt16(3)
                    };

                // check to make sure we are on the same channel. If not 
                // clear the queue and start over with the new channel
                if (currentChannel != cd.ChannelIndex)
                {
                    if (currentChannel != -1)
                    {
                        SqlContext.Pipe.Send(String.Format("Channel: {0}  Min: {1}  Max: {2}", currentChannel, sus_min, sus_max));
                    }
                    currentChannel = cd.ChannelIndex;
                    window.Clear();
                    sus_max = null;
                    sus_min = null;
                    recalc = true;
                }
                rowcounter++;
                window.Enqueue(cd);

                if (cd.StartTime.Subtract(window.Peek().StartTime).TotalSeconds >= seconds)
                {
                    if (recalc)
                    {
                        recalccounter++;
                        // a current sustained min max value was removed so recalc the window's min max
                        MinMax(window.ToArray(), out min, out max);
                        recalc = false;
                    }
                    else
                    {
                        // update the rolling min max based on the new value coming in
                        max = max == null || cd.DmsValue > max ? cd.DmsValue : max;
                        min = min == null || cd.DmsValue < min ? cd.DmsValue : min;
                    }

                    // update the sustained min max based on the current window's min max
                    sus_min = sus_min == null || max < sus_min ? max : sus_min;
                    sus_max = sus_max == null || min > sus_max ? min : sus_max;

                    // now that we calculated remove the first item
                    var firstitem = window.Dequeue();
                    if (firstitem.DmsValue == sus_min || firstitem.DmsValue == sus_max ||
                        firstitem.DmsValue == min || firstitem.DmsValue == max)
                    {
                        recalc = true;
                    }
                }
            }
        }
        if (sus_max != null && sus_min != null)
        {
            SqlContext.Pipe.Send(String.Format("Channel: {0}  Min: {1}  Max: {2}", currentChannel, sus_min, sus_max));
        }
        window.Clear();
        window = null;

        SqlContext.Pipe.Send(String.Format("Rows: {0}, Recalcs performed: {1}", rowcounter, recalccounter));
        SqlContext.Pipe.Send("Done!");
    }
    catch (Exception)
    {
        throw;
    }
    finally
    {
        con.Close();
        con.Dispose();
        cmd.Dispose();
    }
}

private static void MinMax(ContinuousData[] cd, out float? min, out float? max)
{
    min = cd[0].DmsValue;
    max = cd[0].DmsValue;

    for (int i = 0; i < cd.Length; i++)
    {
        if (min > cd[i].DmsValue)
            min = cd[i].DmsValue;
        if (max < cd[i].DmsValue)
            max = cd[i].DmsValue;
    }
}

public class ContinuousData
{
    public int ChannelIndex { get; set; }
    public DateTime StartTime { get; set; }
    public float DmsValue { get; set; }
    public int Duration { get; set; }
}

public class ChannelValues
{
    public int ChannelIndex { get; set; }
    public float SustainedMin { get; set; }
    public float SustainedMax { get; set; }
}
};

在SQL Server中复制Oracles范围窗口函数的最佳方法

3 个答案: