找到一组连续活动的最小值和最大值

时间:2019-03-19 03:37:17

标签: sql oracle hive

嗨,我有一个看起来像这样的数据集:

order   id  seq act_seq     time                    station
100     2   1   9       2015-09-02 10:18:15.000000      7
100     2   1   10      2015-09-02 10:18:22.000000      7
100     2   1   11      2015-09-02 10:26:48.000000      7
100     2   1   12      2015-09-02 10:35:20.000000      7
100     2   1   13      2015-09-02 17:32:33.000000      7
100     2   1   14      2015-09-02 17:32:39.000000      7
100     2   1   15      2015-09-02 17:32:39.000000      7
100     2   1   16      2015-09-30 04:00:05.000000      7
100     2   1   17      2015-09-30 04:00:05.000000      7
100     2   1   18      2015-09-30 04:00:13.000000      8
100     2   1   19      2015-09-30 04:00:56.000000      8
100     2   1   20      2015-09-30 07:24:00.000000      7
100     2   1   21      2015-09-30 07:24:03.000000      7
100     2   1   22      2015-09-30 07:24:06.000000      7
100     2   1   23      2015-09-30 07:24:03.000000      9
and so on..............

我有一个主键为order,id,seq的订单。对于每个订单,id,seq,每个“站”都有一堆用“ act_seq”标记的活动。

我想找到每个电台的最短时间和最长时间,然后再转换到另一个电台。因此,站点7的输出应为:

order   id  seq station     min_time                    max_time
100     2   1   7           2015-09-02 10:18:15.000000  2015-09-30 04:00:05.000000
100     2   1   7           2015-09-30 07:24:00.000000  2015-09-30 07:24:06.000000

即站点7的订单已转换为站点8,然后从站点8转换为站点7,因此将有2条记录具有最小和最大时间值。

我该怎么做?

3 个答案:

答案 0 :(得分:0)

这可以做到。还要避免使用作为Oracle保留关键字的列名,例如ORDER

Fiddle Demo

首先在子查询(T2)中创建一个临时列BRK,该临时列将根据lag确定站是否已更改,如1顺序中断。 会根据此新列进行累计和,然后根据此group by列进行CUM_SUM

SELECT "ORDER",
  ID,
  SEQ,
  STATION,
  MIN(TIME) AS MIN_TIME,
  MAX(TIME) AS MAX_TIME
FROM
  (SELECT T2.*,
    SUM(BRK) OVER (ORDER BY ACT_SEQ) AS CUM_SUM
  FROM
    (SELECT T1.*,
      CASE
        WHEN STATION <> NXT
        THEN 1
        ELSE 0
      END AS BRK
    FROM
      ( SELECT T.*,LAG(STATION,1) OVER (ORDER BY ACT_SEQ) AS NXT FROM TABLE1 T
      ) T1
    ) T2
  )
GROUP BY "ORDER",
  ID,
  SEQ,
  STATION,
  CUM_SUM
ORDER BY "ORDER",
  ID,
  SEQ,
  STATION,
  CUM_SUM;

答案 1 :(得分:0)

这也可以用来获得预期的输出。您需要确定只要前一站是相同的,并使用sum将这些行分组,以找出给定行的最小和最大时间。使用Act_sq作为要排序的主要字段。

with cte as
( 
select 100 as Ordern, 2 as ID, 1 as Seq, 9  as Act_sq, '2015-09-02 10:18:15.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 10 as Act_sq, '2015-09-02 10:18:22.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 11 as Act_sq, '2015-09-02 10:26:48.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 12 as Act_sq, '2015-09-02 10:35:20.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 13 as Act_sq, '2015-09-02 17:32:33.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 14 as Act_sq, '2015-09-02 17:32:39.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 15 as Act_sq, '2015-09-02 17:32:39.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 16 as Act_sq, '2015-09-30 04:00:05.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 17 as Act_sq, '2015-09-30 04:00:05.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 18 as Act_sq, '2015-09-30 04:00:13.000000' as timen, 8 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 19 as Act_sq, '2015-09-30 04:00:56.000000' as timen, 8 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 20 as Act_sq, '2015-09-30 07:24:00.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 21 as Act_sq, '2015-09-30 07:24:03.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 22 as Act_sq, '2015-09-30 07:24:06.000000' as timen, 7 as Station from dual  union all 
select 100 as Ordern, 2 as ID, 1 as Seq, 23 as Act_sq, '2015-09-30 07:24:03.000000' as timen, 9 as Station from dual  ) 




select z.Ordern, z.id, z.Seq, z.Station, z.mintime, z.maxtime  
       from (
      select t.Ordern, t.id, t.Seq, t.Station,t.samevalue  , min(t.timen) mintime, max(t.timen) maxtime   from (
      select Ordern,id,Seq, Station,timen,  sum(samestation) over  (partition by ordern order by act_sq) samevalue 
              from (
              select   Ordern,id,Seq, Station,timen,  act_sq, 
     case when lag(station,1,1) over (partition by ordern order by act_sq) = Station then 0 else  1 end samestation    from cte c) tst) t 
     join (   select Ordern,id,Seq, Station,timen, sum(samestation) over  (partition by ordern order by act_sq) samevalue 
              from (
              select    Ordern,id,Seq, Station,timen, act_sq,
     case when lag(station,1,1) over (partition by ordern order by act_sq) = Station then 0 else  1 end samestation   
      from cte) new) drv on t.Ordern = drv.Ordern and t.samevalue = drv.samevalue
      where t.Station = 7 
      group by t.Ordern, t.id, t.Seq, t.Station,t.samevalue  ) z; 

输出:

ORDERN  ID  SEQ STATION MINTIME                       MAXTIME
100     2   1     7     2015-09-02 10:18:15.000000  2015-09-30 04:00:05.000000
100     2   1      7    2015-09-30 07:24:00.000000  2015-09-30 07:24:06.000000

答案 2 :(得分:0)

-- Oracle 12c+: Pattern matching
with s (orderid, id, seq, act_seq, dtime, station) as (
select 100, 2, 1, 9 , to_date('2015-09-02 10:18:15', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 10, to_date('2015-09-02 10:18:22', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 11, to_date('2015-09-02 10:26:48', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 12, to_date('2015-09-02 10:35:20', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 13, to_date('2015-09-02 17:32:33', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 14, to_date('2015-09-02 17:32:39', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 15, to_date('2015-09-02 17:32:39', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 16, to_date('2015-09-30 04:00:05', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 17, to_date('2015-09-30 04:00:05', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 18, to_date('2015-09-30 04:00:13', 'yyyy-mm-dd hh24:mi:ss'), 8 from dual union all
select 100, 2, 1, 19, to_date('2015-09-30 04:00:56', 'yyyy-mm-dd hh24:mi:ss'), 8 from dual union all
select 100, 2, 1, 20, to_date('2015-09-30 07:24:00', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 21, to_date('2015-09-30 07:24:03', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 22, to_date('2015-09-30 07:24:06', 'yyyy-mm-dd hh24:mi:ss'), 7 from dual union all
select 100, 2, 1, 23, to_date('2015-09-30 07:24:03', 'yyyy-mm-dd hh24:mi:ss'), 9 from dual)
select r.*
from s
match_recognize (
partition by orderid, id, seq
order by act_seq, dtime
measures
  station as station,
  first(dtime) as dt_first,
  last (dtime) as dt_last
pattern (v+)
define v as station = first(station)
) r
order by orderid, id, seq, station;

   ORDERID         ID        SEQ    STATION DT_FIRST            DT_LAST
---------- ---------- ---------- ---------- ------------------- -------------------
       100          2          1          7 2015-09-02 10:18:15 2015-09-30 04:00:05
       100          2          1          7 2015-09-30 07:24:00 2015-09-30 07:24:06
       100          2          1          8 2015-09-30 04:00:13 2015-09-30 04:00:56
       100          2          1          9 2015-09-30 07:24:03 2015-09-30 07:24:03