PostgreSQL将多个期间合并为一个

时间:2018-08-21 13:53:51

标签: postgresql timestamp range

我正在尝试合并范围。

WITH a AS (
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union 
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union 
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union 
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29' 
union 
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union 
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43' 
  ), b AS (
   SELECT *, lag(endat) OVER (ORDER BY begat) < begat OR NULL AS step
   FROM   a
   )
, c AS (
   SELECT *, count(step) OVER (ORDER BY begat) AS grp
   FROM   b
   )
SELECT min(begat), coalesce( max(endat), 'infinity' ) AS range
FROM   c
GROUP  BY grp
ORDER  BY 1 

结果

   1 "2017-09-16 07:12:57";"2017-09-16 11:30:22"
   2 "2017-09-18 17:05:21";"2017-09-19 13:18:01"
   3 "2017-09-19 15:34:40";"2017-09-22 13:29:37"
   4 "2017-09-22 12:24:16";"2017-09-22 13:18:29"
   5 "2017-09-28 09:48:54";"2017-09-28 13:39:13"

位置3,4相交(endata>下一个) 如何使所有交叉点的合并成为一个大间隔

我需要结果

   1 "2017-09-16 07:12:57";"2017-09-16 11:30:22"
   2 "2017-09-18 17:05:21";"2017-09-19 13:18:01"
   3 "2017-09-19 15:34:40";"2017-09-22 13:29:37"
   4 "2017-09-28 09:48:54";"2017-09-28 13:39:13"

3 个答案:

答案 0 :(得分:1)

嘿,我建议使用以下过程:

1-标识行何时是新行,因此将不重叠的值(CTE b)设置为1

2-将彼此重叠的行排序在一起。这样,您可以看到有一个通用标识符,该标识符使您可以将MAX和MIN设置为beatat和endat(CTE c)

3-对于每个序列,给出begat的最小值和endat的最大值,这样您将获得最终值

WITH a AS (
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union 
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union 
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union 
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29' 
union 
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union 
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43' 
  )
, b AS (
SELECT
    begat
  , endat
  , (begat > MAX(endat) OVER w IS TRUE)::INT is_new
FROM a
WINDOW w AS (ORDER BY begat ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
) 
, c AS (
SELECT
    begat
  , endat
  , SUM((is_new)) OVER (ORDER BY begat) seq
FROM b
)
SELECT
    MIN(begat) beg_at
  , MAX(endat) end_at
FROM c
GROUP BY seq

答案 1 :(得分:0)

已编辑

如果需要速度,可以使用psql函数:

create or replace function append_ranges_in_a() returns setof a
  language plpgsql
as
$BODY$
declare
  v_current a%rowtype;
  v_new a%rowtype;
  v_first boolean:=true;
begin
  for v_current in select begat, endat from a order by begat, endat
  loop
    if v_first then
      v_first := false;
      v_new.begat := v_current.begat;
    elsif v_new.endat < v_current.begat then
      return next v_new;
      v_new.begat := v_current.begat;
    end if;
    v_new.endat := greatest(v_current.endat,v_new.endat);
  end loop;
  return next v_new;
  return;
end;
$BODY$;

select * from append_ranges_in_a()

我用〜400000行对其进行了测试:

delete from a;

insert into a (begat,  endat)
  select time::text, (time+'1 day'::interval)::text
    from (select t+(round(random()*23.0)||' hours')::interval as time
       from generate_series('1401-01-01'::timestamp,'2018-08-21'::timestamp,'1 day'::interval) t
    ) t;

select count(*) from a;

select * from append_ranges_in_a() offset 100000 limit 10      

,它是O(n ^ 2)纯SQL版本的两倍。

旧的慢速解决方案:

您可以使用递归WITH查询https://www.postgresql.org/docs/current/static/queries-with.html逐行构造结果。

  1. 我创建表
  2. 第一行是候选的第一行(在结尾处结束),但该行未“准备好”
  3. 然后,我查看下一行(步骤),如果不相交,则添加一个准备好的行,
  4. 我还添加了一个未准备好的行,其中包含当前(最近)观察到的范围
  5. 当我没有更多行时,我将计算最后一行
  6. 我保留准备就绪的行和最后一行

这是代码

CREATE TABLE a as 
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union 
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union 
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union 
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29' 
union 
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union 
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43';

WITH RECURSIVE t(begat, endat, ready, step) AS (
    select * from (
      select *,false,1 from a order by begat, endat limit 1) a
  UNION ALL
    SELECT new_rows.*
       FROM (SELECT * FROM t ORDER BY begat DESC limit 1) t,
           lateral (SELECT * FROM a ORDER BY begat, endat OFFSET step LIMIT 1) a,
           lateral (
              SELECT t.begat, t.endat, true as ready, step WHERE t.endat < a.begat
              UNION SELECT CASE WHEN t.endat < a.begat THEN a.begat ELSE t.begat END, greatest(a.endat, t.endat), false, step+1
           ) new_rows
)
select begat, endat
  from (
   select begat, endat, ready, row_number() over (order by begat desc, endat desc)=1 is_last
     from t
     order by begat, endat) t
  where ready or is_last;

答案 2 :(得分:0)

i使用范围类型 https://www.postgresql.org/docs/9.3/static/rangetypes.html

    WITH tmp AS (
         -- preparation range type
        select  begat, coalesce( endat, 'infinity' ) as endAt, tsrange( begat, coalesce( endat, 'infinity' ) ) as rg
        from (
        select '2017-09-11 17:13:03'::timestamp as begat ,'2017-09-12 12:24:09'::timestamp as endat  union
        select '2017-09-19 15:34:40','2017-09-20 11:04:45' union
        select '2017-09-20 08:32:00','2017-09-22 13:28:37' union
        select '2017-09-20 13:52:43','2017-09-20 14:14:43' union
        select '2017-09-21 12:24:16','2017-09-21 13:28:29' union
        select '2017-09-22 12:24:16','2017-09-22 13:28:29' union
        select '2017-09-22 12:34:16','2017-09-23 13:28:29' union
        select '2017-09-22 12:25:16','2017-09-24 13:28:29' union
        select '2017-09-28 09:48:54','2017-09-28 13:39:13' union
        select '2017-09-28 14:22:16','2017-09-28 15:52:15' union
        select '2017-10-05 12:17:45','2017-10-06 12:35:38' union
        select '2017-10-06 16:20:44','2017-10-07 10:11:09' union
        select '2017-10-07 20:38:32','2017-10-09 14:42:29' union
        select '2017-10-12 18:22:14','2017-10-12 20:52:45'

        ) a
      ),a as (
       -- group intersecting range
        select l.*
          from tmp l left join tmp r on  l.begAt > r.begAt and r.rg @>  l.rg
          where r.begAt is null  
      ),

       b AS (
       SELECT *, lag(endat) OVER (ORDER BY begat) < begat OR NULL AS step
       FROM   a
       )
    , c AS (
       SELECT *, count(step) OVER (ORDER BY begat) AS grp
       FROM   b
       )
    SELECT min(begat), coalesce( max(endat), 'infinity' ) AS range
    FROM   c
    GROUP  BY grp
    ORDER  BY 1