时间序列中的事件分组

时间:2016-05-10 17:12:50

标签: sql oracle

我正在尝试在我的测量数据中建立一组降水事件。我有一个时间,一个测量值和一个标志,注意到它是否在下雨:

00:00, 32.4, 0
00:10, 32.4, 0
00:20, 32.6, 1
00:30, 32.7, 1
00:40, 32.9, 1
00:50, 33.2, 1
01:00, 33.2, 0
01:10, 33.2, 0
01:20, 33.2, 0
01:30, 33.5, 1
01:40, 33.6, 1
01:50, 33.6, 0
02:00, 33.6, 0
...

现在我想为降水事件生成事件ID:

00:00, 32.4, 0, NULL
00:10, 32.4, 0, NULL
00:20, 32.6, 1, 1
00:30, 32.7, 1, 1
00:40, 32.9, 1, 1
00:50, 33.2, 1, 1
01:00, 33.2, 0, NULL
01:10, 33.2, 0, NULL
01:20, 33.2, 0, NULL
01:30, 33.5, 1, 2
01:40, 33.6, 1, 2
01:50, 33.6, 0, NULL
02:00, 33.6, 0, NULL
...

然后我将能够使用分组来总结事件。任何提示如何在Oracle中执行此操作都非常感谢。

到目前为止,我能够计算出上面提到的标志和最后一行的差异:

SELECT
  measured_at,
  station_id
  ps, -- precipitation sum
  ps - lag(ps, 1, NULL) OVER (ORDER BY measured_at ASC) as p, -- precipitation delta
  CASE 
    WHEN ps - lag(ps, 1, NULL) OVER (ORDER BY measured_at ASC) > 0 THEN 1
    ELSE 0 
    END as rainflag
FROM measurements;

我认为必须以某种方式生成所需的事件ID,但无法弄明白。谢谢你的时间!

使用mt0回答的最终解决方案:

DROP TABLE events;
CREATE TABLE events (measured_at, station_id, ps) AS
SELECT TO_DATE('2016-05-01 12:00', 'YYYY-MM-DD HH24:MI'), 'XYZ', 32.4 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:10', 'YYYY-MM-DD HH24:MI'), 'XYZ', 32.6 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:20', 'YYYY-MM-DD HH24:MI'), 'XYZ', 32.7 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:30', 'YYYY-MM-DD HH24:MI'), 'XYZ', 32.9 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:40', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:50', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:00', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:10', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:20', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.5 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:30', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.6 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:40', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.6 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:50', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.5 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 17:00', 'YYYY-MM-DD HH24:MI'), 'XYZ', 39.1 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 17:10', 'YYYY-MM-DD HH24:MI'), 'XYZ', 39.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 17:20', 'YYYY-MM-DD HH24:MI'), 'XYZ', 39.2 FROM DUAL;


WITH

flagged AS (
    SELECT
        measured_at,
        station_id,
        ps,
        CASE
          WHEN measured_at - lag(measured_at, 1, NULL) OVER (ORDER BY measured_at) = (1/144) THEN ps - lag(ps, 1, NULL) OVER (ORDER BY measured_at)
          ELSE NULL
        END as delta_p,
        CASE
          WHEN ps - lag(ps, 1, NULL) OVER (ORDER BY measured_at) > 0 THEN 1
          ELSE 0
        END AS rain
    FROM events
),

eventmarked AS (
    SELECT
        f.*,
        CASE
            WHEN f.delta_p >= 0 THEN f.delta_p
            ELSE NULL
        END AS p,
        CASE rain
            WHEN 1 THEN COUNT(1) OVER (ORDER BY measured_at) - SUM(rain) OVER (ORDER BY measured_at)
        END as event
    FROM flagged f
),

summarized AS (
    SELECT
        em.*,
        sum(CASE p WHEN 0 THEN NULL ELSE p END) OVER (PARTITION BY event ORDER BY measured_at) as e_ps
    FROM eventmarked em
)

SELECT measured_at, station_id, ps, p, e_ps FROM summarized
ORDER BY measured_at;

2 个答案:

答案 0 :(得分:2)

Oracle安装程序

CREATE TABLE events ( measured_at, station_id, ps ) AS
SELECT '00:00', 32.4, 0 FROM DUAL UNION ALL
SELECT '00:10', 32.4, 0 FROM DUAL UNION ALL
SELECT '00:20', 32.6, 1 FROM DUAL UNION ALL
SELECT '00:30', 32.7, 1 FROM DUAL UNION ALL
SELECT '00:40', 32.9, 1 FROM DUAL UNION ALL
SELECT '00:50', 33.2, 1 FROM DUAL UNION ALL
SELECT '01:00', 33.2, 0 FROM DUAL UNION ALL
SELECT '01:10', 33.2, 0 FROM DUAL UNION ALL
SELECT '01:20', 33.2, 0 FROM DUAL UNION ALL
SELECT '01:30', 33.5, 1 FROM DUAL UNION ALL
SELECT '01:40', 33.6, 1 FROM DUAL UNION ALL
SELECT '01:50', 33.6, 0 FROM DUAL UNION ALL
SELECT '02:00', 33.6, 0 FROM DUAL;

<强>查询

SELECT measured_at,
       station_id,
       ps,
       CASE WHEN rainflag IS NOT NULL THEN DENSE_RANK() OVER ( ORDER BY rainflag ) END AS rainflag
FROM   (
  SELECT e.*,
         CASE ps
           WHEN 1
           THEN COUNT( 1 ) OVER ( ORDER BY measured_at )
                  - SUM( ps ) OVER ( ORDER BY measured_at )
           END AS rainflag
  FROM   events e
)
ORDER BY measured_at;

查询2

SELECT measured_at,
       station_id,
       ps,
       CASE ps WHEN 1
               THEN SUM( rainflag ) OVER ( ORDER BY measured_at )
               END AS rainflag
FROM   (
  SELECT e.*,
         CASE WHEN ps > LAG( ps, 1, 0 ) OVER ( ORDER BY measured_at )
              THEN 1
              END AS rainflag
  FROM   events e
);

<强>输出

MEASURED_AT STATION_ID         PS   RAINFLAG
----------- ---------- ---------- ----------
00:00             32.4          0            
00:10             32.4          0            
00:20             32.6          1          1 
00:30             32.7          1          1 
00:40             32.9          1          1 
00:50             33.2          1          1 
01:00             33.2          0            
01:10             33.2          0            
01:20             33.2          0            
01:30             33.5          1          2 
01:40             33.6          1          2 
01:50             33.6          0            
02:00             33.6          0            

答案 1 :(得分:1)

仅使用LAG函数的替代解决方案。

在子查询中,列PS2标记 rain started 事件。主查询简单地对此标志求和,同时忽略不下雨的时间。

with ev as (
 select measured_at, station_id, ps,
 case when ps = 1 and lag(ps,1,0) over (order by measured_at) = 0 
    then 1 else 0 end ps2 
 from events)
select measured_at, station_id, ps, ps2,
 case when ps = 1 then 
   sum(ps2) over (order by measured_at) end rf
from ev
;

MEASURED_AT STATION_ID         PS        PS2         RF
----------- ---------- ---------- ---------- ----------
00:00             32,4          0          0            
00:10             32,4          0          0            
00:20             32,6          1          1          1 
00:30             32,7          1          0          1 
00:40             32,9          1          0          1 
00:50             33,2          1          0          1 
01:00             33,2          0          0            
01:10             33,2          0          0            
01:20             33,2          0          0            
01:30             33,5          1          1          2 
01:40             33,6          1          0          2 
01:50             33,6          0          0            
02:00             33,6          0          0