我需要在SQL Server中执行此Oracle查询:
select case_id, channel_index,
min(su_min) as sustained_min,
max(su_max) as sustained_max
from (
select case_id, channel_index, start_time,
min(dms_value) over (partition by case_id, channel_index order by start_time
range numtodsinterval(3, 'minute') preceeding) as su_max,
max(dms_value) over (partition by case_id, channel_index order by start_time
range numtodsinterval(3, 'minute') preceeding) as su_min,
min(start_time) over (partition by case_id, channel_index order by start_time)
as first_time
from data_table order by start_time
) as su_data
where
first_time + numtodsinterval(3, 'minute') <= start_time
group by
case_id, channel_index
以下是我在基本T-SQL中尝试完成的工作,但是当案例有100万条+记录时,需要花费37分钟(之后我取消了查询):
ALTER procedure [dbo].[GetSustainedValues](
@case_id int,
@time_limit int,
@bypass_only bit = NULL)
as
begin
DECLARE @time DateTime, @channelindex int, @lastchannelindex int
DECLARE @tmin float, @tmax float, @min float, @max float, @caseid int
DECLARE @results TABLE(case_id int, channel_index int, max float null, min float null)
DECLARE CursorName CURSOR FAST_FORWARD
FOR SELECT start_time, channel_index from continuous_data where case_id = @case_id order by channel_index, start_time
OPEN CursorName
FETCH NEXT FROM CursorName INTO @time, @channelindex
SET @lastchannelindex = @channelindex
WHILE @@FETCH_STATUS = 0
BEGIN
--PRINT 'hello' --'Chennel:' + CONVERT (VARCHAR(50), @channelindex,128) + ' Time:' + CONVERT (VARCHAR(50), @time,128)
IF @lastchannelindex != @channelindex
BEGIN
--PRINT 'Starting new channel:' + CONVERT (VARCHAR(50), @channelindex,128)
-- we are starting on a new channel so insert that data into the results
-- table and reset the min/max
INSERT INTO @results(case_id, channel_index, max, min) VALUES(@case_id, @lastchannelindex, @max, @min)
SET @max = null
SET @min = null
SET @lastchannelindex = @channelindex
END
Select @tmax = MAX(dms_value), @tmin = MIN(dms_value)
from continuous_data
where case_id = @case_id and channel_index = @channelindex and start_time between DATEADD(s, -(@time_limit-1), @time) and @time
HAVING SUM(value_duration) >= @time_limit
IF @@ROWCOUNT > 0
BEGIN
IF @max IS null OR @tmin > @max
BEGIN
--PRINT 'Setting max:' + CONVERT (VARCHAR(50), @tmin,128) + ' for channel:' + CONVERT (VARCHAR(50), @channelindex,128)
set @max = @tmin
END
IF @min IS null OR @tmax < @min
BEGIN
set @min = @tmax
END
END
--PRINT 'Max:' + CONVERT (VARCHAR(50), @max,128) + ' Min:' + CONVERT (VARCHAR(50), @min,128)
FETCH NEXT FROM CursorName INTO @time, @channelindex
END
CLOSE CursorName
DEALLOCATE CursorName
--PRINT 'Max:' + CONVERT (VARCHAR(50), @max,128) + ' Min:' + CONVERT (VARCHAR(50), @min,128)
SELECT * FROM @results
end
这是使用CLR存储过程的好地方吗?还有其他任何想法可以使这个查询更有效吗?
编辑3-9-2012: 不要专注于“first_time”字段。它是确保3分钟窗口开始3分钟进入数据集。在我的查询中,我不关心first_time。我需要的是每个通道所有3分钟周期的最小/最大持续值。
以下是一些包含2个频道的示例数据。请注意,每个样本的持续时间并不总是相同:
CREATE TABLE #continuous_data
(
case_id int
, channel_index int
, start_time datetime
, dms_value float,
, value_duration smallint
)
INSERT #continuous_data VALUES (2081, 51, '2011-05-18 09:36:34.000', 90, 6)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:39.000', 94.8125, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:40.000', 95.4375, 1)
INSERT #continuous_data VALUES (2081, 51, '2011-05-18 09:36:40.000', 96, 6)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:41.000', 96.75, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:42.000', 98.0625, 2)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:44.000', 99.3125, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:45.000', 100.625, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:46.000', 101.9375, 2)
INSERT #continuous_data VALUES (2081, 51, '2011-05-18 09:36:46.000', 98, 6)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:48.000', 103.25, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:49.000', 104.5625, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:50.000', 105.8125, 2)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:52.000', 107.125, 1)
INSERT #continuous_data VALUES (2081, 51, '2011-05-18 09:36:52.000', 92, 6)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:53.000', 108.4375, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:54.000', 109.75, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:55.000', 111.0625, 2)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:57.000', 112.3125, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:58.000', 113.625, 1)
INSERT #continuous_data VALUES (2081, 51, '2011-05-18 09:36:58.000', 86, 6)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:36:59.000', 114.9375, 2)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:37:01.000', 116.25, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:37:02.000', 117.5, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:37:03.000', 118.8125, 2)
INSERT #continuous_data VALUES (2081, 51, '2011-05-18 09:37:04.000', 80, 6)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:37:05.000', 120.125, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:37:06.000', 121.4375, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:37:07.000', 122.75, 1)
INSERT #continuous_data VALUES (2081, 50, '2011-05-18 09:37:08.000', 124.0625, 1)
答案 0 :(得分:0)
如果您要执行以下操作会怎么样?
SELECT dt2.case_id, dt2.channel_index, dtf.first_time, su_qry.su_min, su_qry.su_max
FROM (SELECT dt.case_id, dt.channel_index, dt.start_time, MIN (dms_value) AS su_min, MAX (dms_value) AS su_max
FROM data_table dt
INNER JOIN
(SELECT case_id, channel_index, start_time, dateadd ('mi', start_time, -3) AS start_time_minus_3
FROM data_table) dtr
ON ( dt.case_id = dtr.case_id
AND dt.channel_index = dtr.channel_index
AND dt.start_time >= dtr.start_time_minus_3
AND dt.start_time <= start_time
)
GROUP BY dt.case_id, dt.channel_index, dt.start_time) su_qry
INNER JOIN
(SELECT case_id, channel_index, MIN (start_time)
FROM data_table dt
GROUP BY case_id, channel_index) dtf ON (su_qry.case_id = dtf.case_id AND su_qry.channel_index = dtf.channel_index)
INNER JOIN data_table dt2 ON (su_qry.case_id = dt2.case_id AND su_qry.channel_index = dt2.channel_index)
WHERE dateadd ('mi', dtf.first_time, 3) <= dt2.start_time
这不是100%,但我认为这可能会给你你想要的东西。 基本上,我们通过执行大于和小于连接来找到数据表中每行的过去3分钟的最小值和最大值。我们将这些结果加到我们的“第一次”计算中,最后加到WHERE谓词的主表中。
答案 1 :(得分:0)
如果我理解正确,您需要以下
对于每个case_id,channel_index组合:
start_time
值以来尚未过去3分钟,请排除该数据。Oracle查询和您的解决方案(存储过程和CLR存储过程)之间仍存在一些无法解释的差异:
first_time + numtodsinterval(3, 'minute') <= start_time
删除前3分钟之前的时间窗口。value_duration
列位于示例数据中,但未在解决方案中使用<强>解强> - 这可能不是最快的解决方案,但应该可行 -
步骤0 :窗口时间范围 - 样本数据不包含3分钟的数据,因此我使用变量来保存窗口时间范围所需的秒数。对于实际数据,您可以使用180秒。
DECLARE @seconds int
SET @seconds = 10
第1步:第一次 - 虽然first_time
并不重要,但仍有必要确保我们不包含不完整的时间段。稍后将使用它在第一个完整时间段过去之前排除数据。
-- Query to return the first_time, last_time, and range_time
-- range_time is first complete time period using the time range
SELECT case_id
, channel_index
, MIN(start_time) AS first_time
, DATEADD(ss, @seconds, MIN(start_time)) AS range_time
, MAX(start_time) AS last_time
FROM #continuous_data
GROUP BY case_id, channel_index
ORDER BY case_id, channel_index
-- Results from the sample data
case_id channel_index first_time range_time last_time
----------- ------------- ----------------------- ----------------------- -----------------------
2081 50 2011-05-18 09:36:39.000 2011-05-18 09:36:49.000 2011-05-18 09:37:08.000
2081 51 2011-05-18 09:36:34.000 2011-05-18 09:36:44.000 2011-05-18 09:37:04.000
第2步:时间窗 - Oracle查询使用partition by case_id, channel_index order by start_time range numtodsinterval(3, 'minute') preceeding
查找子查询中的最小和最大dms_value
以及first_time
。由于SQL Server没有range
功能,因此您需要使用子查询来定义3分钟的窗口。 Oracle查询使用range ... preceeding
,因此SQL Server范围将使用带有负值的DATEADD
:
-- Windowing for each time range. Window is the negative time
-- range from each start_time row
SELECT case_id
, channel_index
, DATEADD(ss, -@seconds, start_time) AS window_start
, start_time AS window_end
FROM #continuous_data
ORDER BY case_id, channel_index, start_time
步骤3 :时间窗口的MIN / MAX - 接下来,您需要找到每个窗口的最小值和最大值。这是执行大部分计算的地方,需要大多数调试才能获得预期的结果。
-- Find the maximum and minimum values for each window range
-- I included the start_time min/max/diff for debugging
SELECT su.case_id
, su.channel_index
, win.window_end
, MAX(dms_value) AS dms_max
, MIN(dms_value) AS dms_min
, MIN(su.start_time) AS time_min
, MAX(su.start_time) AS time_max
, DATEDIFF(ss, MIN(su.start_time), MAX(su.start_time)) AS time_diff
FROM #continuous_data AS su
JOIN (
-- Windowing for each time range. Window is the negative time
-- range from each start_time row
SELECT case_id
, channel_index
, DATEADD(ss, -@seconds, start_time) AS window_start
, start_time AS window_end
FROM #continuous_data
) AS win
ON ( su.case_id = win.case_id
AND su.channel_index = win.channel_index)
JOIN (
-- Find the first_time and add the time range
SELECT case_id
, channel_index
, MIN(start_time) AS first_time
, DATEADD(ss, @seconds, MIN(start_time)) AS range_time
FROM #continuous_data
GROUP BY case_id, channel_index
) AS fir
ON ( su.case_id = fir.case_id
AND su.channel_index = fir.channel_index)
WHERE su.start_time BETWEEN win.window_start AND win.window_end
AND win.window_end >= fir.range_time
GROUP BY su.case_id, su.channel_index, win.window_end
ORDER BY su.case_id, su.channel_index, win.window_end
-- Results from sample data:
case_id channel_index window_end dms_max dms_min time_min time_max time_diff
----------- ------------- ----------------------- ---------------------- ---------------------- ----------------------- ----------------------- -----------
2081 50 2011-05-18 09:36:49.000 104.5625 94.8125 2011-05-18 09:36:39.000 2011-05-18 09:36:49.000 10
2081 50 2011-05-18 09:36:50.000 105.8125 95.4375 2011-05-18 09:36:40.000 2011-05-18 09:36:50.000 10
2081 50 2011-05-18 09:36:52.000 107.125 98.0625 2011-05-18 09:36:42.000 2011-05-18 09:36:52.000 10
2081 50 2011-05-18 09:36:53.000 108.4375 99.3125 2011-05-18 09:36:44.000 2011-05-18 09:36:53.000 9
2081 50 2011-05-18 09:36:54.000 109.75 99.3125 2011-05-18 09:36:44.000 2011-05-18 09:36:54.000 10
2081 50 2011-05-18 09:36:55.000 111.0625 100.625 2011-05-18 09:36:45.000 2011-05-18 09:36:55.000 10
2081 50 2011-05-18 09:36:57.000 112.3125 103.25 2011-05-18 09:36:48.000 2011-05-18 09:36:57.000 9
2081 50 2011-05-18 09:36:58.000 113.625 103.25 2011-05-18 09:36:48.000 2011-05-18 09:36:58.000 10
2081 50 2011-05-18 09:36:59.000 114.9375 104.5625 2011-05-18 09:36:49.000 2011-05-18 09:36:59.000 10
2081 50 2011-05-18 09:37:01.000 116.25 107.125 2011-05-18 09:36:52.000 2011-05-18 09:37:01.000 9
2081 50 2011-05-18 09:37:02.000 117.5 107.125 2011-05-18 09:36:52.000 2011-05-18 09:37:02.000 10
2081 50 2011-05-18 09:37:03.000 118.8125 108.4375 2011-05-18 09:36:53.000 2011-05-18 09:37:03.000 10
2081 50 2011-05-18 09:37:05.000 120.125 111.0625 2011-05-18 09:36:55.000 2011-05-18 09:37:05.000 10
2081 50 2011-05-18 09:37:06.000 121.4375 112.3125 2011-05-18 09:36:57.000 2011-05-18 09:37:06.000 9
2081 50 2011-05-18 09:37:07.000 122.75 112.3125 2011-05-18 09:36:57.000 2011-05-18 09:37:07.000 10
2081 50 2011-05-18 09:37:08.000 124.0625 113.625 2011-05-18 09:36:58.000 2011-05-18 09:37:08.000 10
2081 51 2011-05-18 09:36:46.000 98 96 2011-05-18 09:36:40.000 2011-05-18 09:36:46.000 6
2081 51 2011-05-18 09:36:52.000 98 92 2011-05-18 09:36:46.000 2011-05-18 09:36:52.000 6
2081 51 2011-05-18 09:36:58.000 92 86 2011-05-18 09:36:52.000 2011-05-18 09:36:58.000 6
2081 51 2011-05-18 09:37:04.000 86 80 2011-05-18 09:36:58.000 2011-05-18 09:37:04.000 6
第4步:最后,您可以将它们放在一起,以返回每个时间窗口的最低MAX值和最高MIN值:
SELECT su.case_id
, su.channel_index
, MIN(dms_max) AS su_min
, MAX(dms_min) AS su_max
FROM (
SELECT su.case_id
, su.channel_index
, win.window_end
, MAX(dms_value) AS dms_max
, MIN(dms_value) AS dms_min
FROM #continuous_data AS su
JOIN (
-- Windowing for each time range. Window is the negative time
-- range from each start_time row
SELECT case_id
, channel_index
, DATEADD(ss, -@seconds, start_time) AS window_start
, start_time AS window_end
FROM #continuous_data
) AS win
ON ( su.case_id = win.case_id
AND su.channel_index = win.channel_index)
JOIN (
-- Find the first_time and add the time range
SELECT case_id
, channel_index
, MIN(start_time) AS first_time
, DATEADD(ss, @seconds, MIN(start_time)) AS range_time
FROM #continuous_data
GROUP BY case_id, channel_index
) AS fir
ON ( su.case_id = fir.case_id
AND su.channel_index = fir.channel_index)
WHERE su.start_time BETWEEN win.window_start AND win.window_end
AND win.window_end >= fir.range_time
GROUP BY su.case_id, su.channel_index, win.window_end
) AS su
GROUP BY su.case_id, su.channel_index
ORDER BY su.case_id, su.channel_index
-- Results from sample data:
case_id channel_index su_min su_max
----------- ------------- ---------------------- ----------------------
2081 50 104.5625 113.625
2081 51 86 96
答案 2 :(得分:0)
好的,所以这里有一个解决问题的CLR存储过程。这将在大约3:05(分钟)内从包含110万条记录的案例中返回持续的最小/最大值。请告诉我是否有一种简单的T-SQL方法来实现这一点,因为我宁愿不走这条路。但是,关于如何提高速度的评论也将受到赞赏。
public partial class StoredProcedures
{
[Microsoft.SqlServer.Server.SqlProcedure]
public static void ComputeCaseSustainedChannelValues(int caseId, int seconds)
{
SqlConnection con = new SqlConnection();
SqlCommand cmd = new SqlCommand();
try
{
con = new SqlConnection("context connection=true");
con.Open();
cmd = new SqlCommand(String.Format("Select channel_index, start_time, dms_value, value_duration from continuous_data where case_id = {0} and dms_type = 0 and error_code is NULL order by channel_index, start_time", caseId), con);
SqlDataReader reader = cmd.ExecuteReader();
Queue<ContinuousData> window = new Queue<ContinuousData>();
ArrayList channelValues = new ArrayList();
float? sus_min = null, sus_max = null;
float? min = null, max = null;
int currentChannel = -1;
bool recalc = true;
int recalccounter = 0;
int rowcounter = 0;
using (reader)
{
while (reader.Read())
{
var cd = new ContinuousData
{
ChannelIndex = reader.GetInt16(0),
StartTime = reader.GetDateTime(1),
DmsValue = (float)reader.GetSqlDouble(2),
Duration = reader.GetInt16(3)
};
// check to make sure we are on the same channel. If not
// clear the queue and start over with the new channel
if (currentChannel != cd.ChannelIndex)
{
if (currentChannel != -1)
{
SqlContext.Pipe.Send(String.Format("Channel: {0} Min: {1} Max: {2}", currentChannel, sus_min, sus_max));
}
currentChannel = cd.ChannelIndex;
window.Clear();
sus_max = null;
sus_min = null;
recalc = true;
}
rowcounter++;
window.Enqueue(cd);
if (cd.StartTime.Subtract(window.Peek().StartTime).TotalSeconds >= seconds)
{
if (recalc)
{
recalccounter++;
// a current sustained min max value was removed so recalc the window's min max
MinMax(window.ToArray(), out min, out max);
recalc = false;
}
else
{
// update the rolling min max based on the new value coming in
max = max == null || cd.DmsValue > max ? cd.DmsValue : max;
min = min == null || cd.DmsValue < min ? cd.DmsValue : min;
}
// update the sustained min max based on the current window's min max
sus_min = sus_min == null || max < sus_min ? max : sus_min;
sus_max = sus_max == null || min > sus_max ? min : sus_max;
// now that we calculated remove the first item
var firstitem = window.Dequeue();
if (firstitem.DmsValue == sus_min || firstitem.DmsValue == sus_max ||
firstitem.DmsValue == min || firstitem.DmsValue == max)
{
recalc = true;
}
}
}
}
if (sus_max != null && sus_min != null)
{
SqlContext.Pipe.Send(String.Format("Channel: {0} Min: {1} Max: {2}", currentChannel, sus_min, sus_max));
}
window.Clear();
window = null;
SqlContext.Pipe.Send(String.Format("Rows: {0}, Recalcs performed: {1}", rowcounter, recalccounter));
SqlContext.Pipe.Send("Done!");
}
catch (Exception)
{
throw;
}
finally
{
con.Close();
con.Dispose();
cmd.Dispose();
}
}
private static void MinMax(ContinuousData[] cd, out float? min, out float? max)
{
min = cd[0].DmsValue;
max = cd[0].DmsValue;
for (int i = 0; i < cd.Length; i++)
{
if (min > cd[i].DmsValue)
min = cd[i].DmsValue;
if (max < cd[i].DmsValue)
max = cd[i].DmsValue;
}
}
public class ContinuousData
{
public int ChannelIndex { get; set; }
public DateTime StartTime { get; set; }
public float DmsValue { get; set; }
public int Duration { get; set; }
}
public class ChannelValues
{
public int ChannelIndex { get; set; }
public float SustainedMin { get; set; }
public float SustainedMax { get; set; }
}
};