我有一张类似于
的表格create table LOCHIST
(
RES_ID VARCHAR(10) NOT NULL,
LOC_DATE TIMESTAMP NOT NULL,
LOC_ZONE VARCHAR(10)
)
使用
等值insert into LOCHIST values(0911,2015-09-23 12:27:00.000000,SYLVSYLGA);
insert into LOCHIST values(5468,2013-02-15 13:13:24.000000,30726);
insert into LOCHIST values(23894,2013-02-15 13:12:13.000000,BECTFOUNC);
insert into LOCHIST values(24119,2013-02-15 13:12:09.000000,30363);
insert into LOCHIST values(7101,2013-02-15 13:11:37.000000,37711);
insert into LOCHIST values(26083,2013-02-15 13:11:36.000000,SHAWANDAL);
insert into LOCHIST values(24978,2013-02-15 13:11:36.000000,38132);
insert into LOCHIST values(26696,2013-02-15 13:11:27.000000,29583);
insert into LOCHIST values(5468,2013-02-15 13:11:00.000000,37760);
insert into LOCHIST values(5552,2013-02-15 13:10:55.000000,30090);
insert into LOCHIST values(24932,2013-02-15 13:10:48.000000,JBTTLITGA);
insert into LOCHIST values(23894,2013-02-15 13:10:42.000000,47263);
insert into LOCHIST values(26803,2013-02-15 13:10:25.000000,32534);
insert into LOCHIST values(24434,2013-02-15 13:10:03.000000,PLANSUFVA);
insert into LOCHIST values(26696,2013-02-15 13:10:00.000000,GEORALBGA);
insert into LOCHIST values(5468,2013-02-15 13:09:54.000000,19507);
insert into LOCHIST values(23894,2013-02-15 13:09:48.000000,37725);
这个表确实持续了数百万条记录。
每个RES_ID表示将其位置ping到LOC_ZONE的预告片的ID,然后在LOC_DATE中存储。
我想要找到的是特定位置区域中所有预告片花费的平均时间。例如,如果拖车x在loc区域PLANSUFVA中花费了4个小时,并且拖车y在loc区域PLANSUFVA中花了6个小时我想要返回
Loc Zone Avg Time
PLANSUFVA 5
无论如何都没有游标吗?
我非常感谢你的帮助。
答案 0 :(得分:1)
这需要SQL 2012:
with data
as (
select *, (case when LOC_ZONE != PREVIOUS_LOC_ZONE or PREVIOUS_LOC_ZONE is null then ROW_ID else null end) as STAY_START, (case when LOC_ZONE != NEXT_LOC_ZONE or NEXT_LOC_ZONE is null then ROW_ID else null end) as STAY_END
from (
select RES_ID, LOC_ZONE, LOC_DATE, lead(LOC_DATE, 1) over (partition by RES_ID, LOC_ZONE order by LOC_DATE) as NEXT_LOC_DATE, lag(LOC_ZONE, 1) over (partition by RES_ID order by LOC_DATE) as PREVIOUS_LOC_ZONE, lead(LOC_ZONE, 1) over (partition by RES_ID order by LOC_DATE) as NEXT_LOC_ZONE, ROW_NUMBER() over (order by RES_ID, LOC_ZONE, LOC_DATE) as ROW_ID
from LOCHIST
) t
), stays as (
select * from (
select RES_ID, LOC_ZONE, STAY_START, lead(STAY_END, 1) over (order by ROWID) as STAY_END
from (
select RES_ID, LOC_ZONE, STAY_START, STAY_END, ROW_NUMBER() over (order by RES_ID, LOC_ZONE, STAY_START desc) as ROWID
from data
where STAY_START is not null or STAY_END is not null
) t
) t
where STAY_START is not null and STAY_END is not null
)
select s.LOC_ZONE, avg(datediff(second, LOC_DATE, NEXT_LOC_DATE)) / 60 / 60 as AVG_IN_HOURS
from data d
inner join stays s on d.RES_ID = s.RES_ID and d.LOC_ZONE = s.LOC_ZONE and d.ROW_ID >= s.STAY_START and d.ROW_ID < s.STAY_END
group by s.LOC_ZONE
答案 1 :(得分:0)
要解决此问题,您需要花费在每个位置的时间。
执行此操作的一种方法是使用相关子查询。您需要对相邻值进行分组。我们的想法是找到序列中的下一个值:
select resid, min(loc_zone) as loc_zone, min(loc_date) as StartTime,
max(loc_date) as EndTime,
nextdate as NextStartTime
from (select lh.*,
(select min(loc_date) from lochist lh2
where lh2.res_id = lh.res_id and lh2.loc_zone <> lh.loc_zone and
lh2.loc_date > lh.loc_date
) as nextdate
from lochist lh
) lh
group by lh.res_id, nextdate
使用此数据,您可以获得所需的平均值。
我不清楚时间是否应该基于EndTime - StartTime
(最后记录的时间减去第一个记录的时间)或NextStartTime - startTime
(第一次在下一个位置减去第一次在这个位置)。
此外,这会为每个res_id
的最后一个位置返回NULL。你没有说明如何处理序列中的最后一个。
如果您在res_id, loc_date, loc_zone
上构建索引,它可能会运行得更快。
如果您使用的是Oracle或SQL Server 2012,则正确的查询是:
select lh.*,
lead(loc_date) over (partition by res_id order by loc_date) as nextdate
from (select lh.*,
lag(loc_zone) over (partition by res_id order by loc_date) as prevzone
from lochist lh
) lh
where prevzone is null or prevzone <> loc_zone
现在每次入住有一行,而nextdate是下一个区域的日期。
答案 2 :(得分:0)
要在不使用游标或相关子查询的情况下执行此操作,请尝试:
with rl as
(select l.*, rank() over (partition by res_id order by loc_date) rn
from lochist l),
fdr as
(select rc.*, coalesce(rn.loc_date, getdate()) next_date
from rl rc
left join rl rn on rc.res_id = rn.res_id and rc.rn + 1 = rn.rn)
select loc_zone, avg(datediff(second, loc_date, next_date))/3600 avg_time
from fdr
group by loc_zone
(由于SQLServer计算时差的方式,计算平均时间(以秒为单位)然后除以60 * 60可能更好。除了getdate()和datediff子句 - 可以用{替换{1}}和sysdate
- 这应该在SQLServer 2005及以后的Oracle 10g中都有效。)
答案 3 :(得分:0)
这应该按照平均花费的分钟数来获得每个区域。 CROSS APPLY
返回不同区域中的下一个ping。
SELECT
loc.LOC_ZONE
,AVG(DATEDIFF(mi,loc.LOC_DATE,nextPing.LOC_DATE)) AS avgMinutes
FROM LOCHIST loc
CROSS APPLY(
SELECT TOP 1 loc2.LOC_DATE
FROM LOCHIST loc2
WHERE loc2.RES_ID = loc.RES_ID
AND loc2.LOC_DATE > loc.LOC_DATE
AND loc2.LOC_ZONE <> loc.LOC_ZONE
ORDER BY loc2.LOC_DATE ASC
) AS nextPing
GROUP BY loc.LOC_ZONE
ORDER BY avgMinutes DESC
答案 4 :(得分:0)
我的解决方案的变体:
select LOC_ZONE, avg(TOTAL_TIME) AVG_TIME from (
select RES_ID, LOC_ZONE, sum(TIME_SPENT) TOTAL_TIME
from (
select RES_ID, LOC_ZONE, datediff(mi, lag(LOC_DATE, 1) over (
partition by RES_ID order by LOC_DATE), LOC_DATE) TIME_SPENT
from LOCHIST
) t
where TIME_SPENT is not null
group by RES_ID, LOC_ZONE) f
group by LOC_ZONE
这说明在同一地点多次停留。 lag
或lead
之间的选择取决于停留应该以ping开始还是结束(即,如果一个预告片从A发送ping,然后x小时后从B发送,那么这是A还是B)。