先前连接数据系列

时间:2018-01-01 05:07:06

标签: sql oracle oracle11g hierarchical-query

我有一张表,其中包含系统的到达和退出及其时间段。我也有到达和退出的预测。我想计算不完整的预测期间的头部的起始和结束计数(理想情况下使用合并和连接)。

要重现的数据:

create table HEAD_COUNT 
(
  PERIOD_START DATE,
  HEAD_COUNT_START NUMBER ,
  HEAD_COUNT_END NUMBER ,
  ARRIVAL NUMBER ,
  EXITS NUMBER
);

Insert into HEAD_COUNT  values ('01-DEC-18',0,0,13,275);
Insert into HEAD_COUNT  values ('01-NOV-18',0,0,0,46);
Insert into HEAD_COUNT  values ('01-OCT-18',0,0,6,61);
Insert into HEAD_COUNT  values ('01-SEP-18',0,0,275,1292);
Insert into HEAD_COUNT  values ('01-AUG-18',0,0,46,1790);
Insert into HEAD_COUNT  values ('01-JUL-18',0,0,61,17);
Insert into HEAD_COUNT  values ('01-JUN-18',0,0,1292,3);
Insert into HEAD_COUNT  values ('01-MAY-18',0,0,1790,15);
Insert into HEAD_COUNT  values ('01-APR-18',0,0,17,158);
Insert into HEAD_COUNT  values ('01-MAR-18',0,0,3,9);
Insert into HEAD_COUNT  values ('01-FEB-18',0,0,15,0);
Insert into HEAD_COUNT  values ('01-JAN-18',0,0,158,4);
Insert into HEAD_COUNT  values ('01-DEC-17',0,0,9,179);
Insert into HEAD_COUNT  values ('01-NOV-17',250,186,0,64);
Insert into HEAD_COUNT  values ('01-OCT-17',276,250,4,30);
Insert into HEAD_COUNT  values ('01-SEP-17',638,276,179,541);
  • 结束一段时间的人数=开始人数+到达人数 - 退出。
  • 启动期间的人数=上一期间的结束人数

我最初的尝试:

merge INTO head_count h USING
(SELECT period_start,
    head_count_start,
    head_count_end,
    arrival,
    exits
  FROM head_count
    CONNECT BY prior period_start = add_months(period_start, -1)
    START WITH period_start       = TRUNC(sysdate, 'Month')
)
src ON (h.period_start = src.period_start)
WHEN matched THEN
  UPDATE
  SET h.head_count_start = src.head_count_end,
    h.head_count_end     = h.head_count_start + h.arrival - h.exits

但是,它没有产生正确的结果。感谢任何帮助!

2 个答案:

答案 0 :(得分:2)

尝试使用Oracle的recursive subquery factoring clause代替CONNECT BY。它需要一些额外的输入,但根据我的经验,语法更有意义。

例如:

--Update the HEAD_COUNT using the last arrival and exits.
merge into head_count using
(
    --Summarize head count start and end.
    with head_count_cte(period_start, head_count_start, head_count_end, arrival, exits) as
    (
        --First month.
        select period_start, head_count_start, head_count_start + arrival - exits head_count_end, arrival, exits
        from head_count
        where period_start = (select min(period_start) from head_count)
        union all
        --All months after the first month.
        select
            head_count.period_start,
            head_count_cte.head_count_end as head_count_start,
            head_count_cte.head_count_end + head_count.arrival - head_count.exits as head_count_end,
            head_count.arrival, head_count.exits
        from head_count_cte
        join head_count
            on add_months(head_count_cte.period_start, 1) = head_count.period_start
    )
    select *
    from head_count_cte
    order by period_start desc
) recursive_head_count
    on (head_count.period_start = recursive_head_count.period_start)
when matched then update set
    head_count.head_count_start = recursive_head_count.head_count_start,
    head_count.head_count_end   = recursive_head_count.head_count_end;

这是一个包含所有数据和结果的完整SQL Fiddle

答案 1 :(得分:2)

递归可以用分析函数代替,这应该更快一点:

演示:http://sqlfiddle.com/#!4/87500c/2

select t.* ,
       first_value( HEAD_COUNT_START ) over (order by PERIOD_START )
       +
       coalesce(sum( ARRIVAL - EXITS) over (order by PERIOD_START 
            ROWS between unbounded preceding and 1 preceding ),0) as Starting_headcount,

       first_value( HEAD_COUNT_START ) over (order by PERIOD_START )
       +
       sum( ARRIVAL - EXITS) over (order by PERIOD_START ) as Ending_Headcount

from HEAD_COUNT t
order by period_start;
|         PERIOD_START | HEAD_COUNT_START | HEAD_COUNT_END | ARRIVAL | EXITS | STARTING_HEADCOUNT | ENDING_HEADCOUNT |
|----------------------|------------------|----------------|---------|-------|--------------------|------------------|
| 2017-09-01T00:00:00Z |              638 |            276 |     179 |   541 |                638 |              276 |
| 2017-10-01T00:00:00Z |              276 |            250 |       4 |    30 |                276 |              250 |
| 2017-11-01T00:00:00Z |              250 |            186 |       0 |    64 |                250 |              186 |
| 2017-12-01T00:00:00Z |                0 |              0 |       9 |   179 |                186 |               16 |
| 2018-01-01T00:00:00Z |                0 |              0 |     158 |     4 |                 16 |              170 |
| 2018-02-01T00:00:00Z |                0 |              0 |      15 |     0 |                170 |              185 |
| 2018-03-01T00:00:00Z |                0 |              0 |       3 |     9 |                185 |              179 |
| 2018-04-01T00:00:00Z |                0 |              0 |      17 |   158 |                179 |               38 |
| 2018-05-01T00:00:00Z |                0 |              0 |    1790 |    15 |                 38 |             1813 |
| 2018-06-01T00:00:00Z |                0 |              0 |    1292 |     3 |               1813 |             3102 |
| 2018-07-01T00:00:00Z |                0 |              0 |      61 |    17 |               3102 |             3146 |
| 2018-08-01T00:00:00Z |                0 |              0 |      46 |  1790 |               3146 |             1402 |
| 2018-09-01T00:00:00Z |                0 |              0 |     275 |  1292 |               1402 |              385 |
| 2018-10-01T00:00:00Z |                0 |              0 |       6 |    61 |                385 |              330 |
| 2018-11-01T00:00:00Z |                0 |              0 |       0 |    46 |                330 |              284 |
| 2018-12-01T00:00:00Z |                0 |              0 |      13 |   275 |                284 |               22 |

然后上面的查询可以用作MERGE语句中的子查询:

MERGE INTO HEAD_COUNT h
USING(
  select t.* ,
       first_value( HEAD_COUNT_START ) over (order by PERIOD_START )
       +
       coalesce(sum( ARRIVAL - EXITS) over (order by PERIOD_START 
            ROWS between unbounded preceding and 1 preceding ),0) as Starting_headcount,

       first_value( HEAD_COUNT_START ) over (order by PERIOD_START )
       +
       sum( ARRIVAL - EXITS) over (order by PERIOD_START ) as Ending_Headcount

  from HEAD_COUNT t
) t
ON (t.PERIOD_START = h.PERIOD_START )
WHEN MATCHED THEN UPDATE
   SET h.HEAD_COUNT_START = t.Starting_headcount,
       h.HEAD_COUNT_END = t.Ending_Headcount

演示:http://sqlfiddle.com/#!4/34268/1

|         PERIOD_START | HEAD_COUNT_START | HEAD_COUNT_END | ARRIVAL | EXITS |
|----------------------|------------------|----------------|---------|-------|
| 2017-09-01T00:00:00Z |              638 |            276 |     179 |   541 |
| 2017-10-01T00:00:00Z |              276 |            250 |       4 |    30 |
| 2017-11-01T00:00:00Z |              250 |            186 |       0 |    64 |
| 2017-12-01T00:00:00Z |              186 |             16 |       9 |   179 |
| 2018-01-01T00:00:00Z |               16 |            170 |     158 |     4 |
| 2018-02-01T00:00:00Z |              170 |            185 |      15 |     0 |
| 2018-03-01T00:00:00Z |              185 |            179 |       3 |     9 |
| 2018-04-01T00:00:00Z |              179 |             38 |      17 |   158 |
| 2018-05-01T00:00:00Z |               38 |           1813 |    1790 |    15 |
| 2018-06-01T00:00:00Z |             1813 |           3102 |    1292 |     3 |
| 2018-07-01T00:00:00Z |             3102 |           3146 |      61 |    17 |
| 2018-08-01T00:00:00Z |             3146 |           1402 |      46 |  1790 |
| 2018-09-01T00:00:00Z |             1402 |            385 |     275 |  1292 |
| 2018-10-01T00:00:00Z |              385 |            330 |       6 |    61 |
| 2018-11-01T00:00:00Z |              330 |            284 |       0 |    46 |
| 2018-12-01T00:00:00Z |              284 |             22 |      13 |   275 |