Question

输入数据

create table schedule_profile 
(
    start_date date,
    end_date date,
    profit number(38,0)
);

insert into schedule_profile values ('5-Jan-2018','1-May-2018',100);
insert into schedule_profile values ('6-Jan-2018','10-Feb-2018',50);
insert into schedule_profile values ('11-Feb-2018','28-Feb-2018',150);
insert into schedule_profile values ('2-May-2018','30-May-2018',200);
insert into schedule_profile values ('6-Jan-2018','30-Jan-2018',25);

输出

    '6-Jan-2018','10-Feb-2018',50
    '11-Feb-2018','28-Feb-2018',150
    '2-May-2018','30-May-2018',200

问题：

用户可以选择什么时间表以获取最大的利润？

注意：如果用户选择一个计划，则他们将不能选择一个重叠的计划。

将每行值视为一个计划。例如，如果用户选择了第一个时间表，则他将从2018年1月5日到2018年5月1日工作，他将获得100的利润。但是，如果用户根据输出选择时间表，他将获得利润

虽然我尝试使用分析窗口功能和自连接，但无法解决问题。有什么办法可以在SQL中解决此问题？

Answer 1

这是解决此问题的一种方法：

使用match_recognize，您可以找到不重叠的日期。为此：

按开始日期，结束日期对输入进行排序
如果start_date在上一行（不重叠）的end_date之前，则该行与上一行重叠。因此，您可以通过定义模式变量non_overlap as ( start_date >= non_overlap.end_date or start_date >= first ( end_date ) )
查找所有不重叠的行，并使用模式init ( non_overlap | {-overlap-} )*跳过重叠的行（排除符周围的花括号是排除运算符，使您无法在输出中包括重叠的日期）
您需要对输入数据集中的每一行重复此操作。因此，您需要after match skip to next row
通过返回measures子句中的nvl ( final sum ( non_overlap.profit ), 0 ) + init.profit获得每个组的总利润

哪个给出类似的内容：

select * 
from   schedule_profile
  match_recognize (
    order by start_date, end_date
    measures 
      classifier() cls,
      match_number() grp,
      nvl ( final sum ( non_overlap.profit ), 0 ) 
        + init.profit total_profit
    all rows per match 
    after match skip to next row
    pattern ( init ( non_overlap | {-overlap-} )* )
    define 
      non_overlap as ( 
        start_date >= non_overlap.end_date or
        start_date >= first ( end_date ) 
      )
);

START_DATE     END_DATE       CLS            GRP    TOTAL_PROFIT    PROFIT   
05-JAN-2018    01-MAY-2018    INIT                1             300       100 
02-MAY-2018    30-MAY-2018    NON_OVERLAP         1             300       200 

06-JAN-2018    30-JAN-2018    INIT                2             375        25 
11-FEB-2018    28-FEB-2018    NON_OVERLAP         2             375       150 
02-MAY-2018    30-MAY-2018    NON_OVERLAP         2             375       200 

06-JAN-2018    10-FEB-2018    INIT                3             400        50 
11-FEB-2018    28-FEB-2018    NON_OVERLAP         3             400       150 
02-MAY-2018    30-MAY-2018    NON_OVERLAP         3             400       200 

11-FEB-2018    28-FEB-2018    INIT                4             350       150 
02-MAY-2018    30-MAY-2018    NON_OVERLAP         4             350       200 

02-MAY-2018    30-MAY-2018    INIT                5             200       200

为此，您所需要做的就是找到max total_profit。您可以通过添加over ()使其成为分析对象来实现。将其粘贴在子查询中，并返回总利润等于该最大值的行：

with profits as (
  select p.*, 
         max ( total_profit ) 
           over () max_total_profit
  from   schedule_profile
    match_recognize (
      order by start_date, end_date
      measures 
        classifier() cls,
        match_number() grp,
        nvl ( final sum ( non_overlap.profit ), 0 ) 
          + init.profit total_profit
      all rows per match 
      after match skip to next row
      pattern ( init ( non_overlap | {-overlap-} )* )
      define 
        non_overlap as ( 
          start_date >= non_overlap.end_date or
          start_date >= first ( end_date ) 
        )
  ) p
)
  select * from profits
  where  total_profit = max_total_profit;

START_DATE     END_DATE       CLS            GRP    TOTAL_PROFIT    PROFIT    MAX_TOTAL_PROFIT   
06-JAN-2018    10-FEB-2018    INIT                3             400        50                 400 
11-FEB-2018    28-FEB-2018    NON_OVERLAP         3             400       150                 400 
02-MAY-2018    30-MAY-2018    NON_OVERLAP         3             400       200                 400

注意：这可以多次处理同一行。在具有许多重叠的大型数据集上，这可能非常慢！

Answer 2

使用递归cte

with t (start_date, end_date, total, path) as(
    select start_date, end_date, profit as total, start_date || '..' || end_date || ';' as path
    from schedule_profile
    union all
    select sp.start_date, sp.end_date, profit + total, path || sp.start_date || '..' || sp.end_date || ';'
    from t
    join schedule_profile sp on t.end_date < sp.start_date
)
select path, total  
from (
    select path, total, max(total) over() maxtotal
    from t
) tm
where total = maxtotal ;

返回

PATH    TOTAL
06-JAN-18..10-FEB-18;11-FEB-18..28-FEB-18;02-MAY-18..30-MAY-18; 400

Fiddle

Answer 3

您可以使用以下查询来获得所需的结果，但就我而言，它是单行的。

-- DATA PREPARATION
SQL> create table schedule_profile
  2  (
  3      start_date date,
  4      end_date date,
  5      profit number(38,0)
  6  );

Table created.

SQL>
SQL> insert into schedule_profile values ('5-Jan-2018','1-May-2018',100);

1 row created.

SQL> insert into schedule_profile values ('6-Jan-2018','10-Feb-2018',50);

1 row created.

SQL> insert into schedule_profile values ('11-Feb-2018','28-Feb-2018',150);

1 row created.

SQL> insert into schedule_profile values ('2-May-2018','30-May-2018',200);

1 row created.

SQL> insert into schedule_profile values ('6-Jan-2018','30-Jan-2018',25);

1 row created.

-

-- ACTUAL QUERY
SQL>
SQL> SELECT
  2      LTRIM(PTH, ' | ') AS DATES,
  3      XMLQUERY
  4  ( VAL RETURNING CONTENT ). GETNUMBERVAL() AS VAL
  5  FROM
  6      (
  7          SELECT
  8              SYS_CONNECT_BY_PATH(START_DATE || '-'
  9                                                || END_DATE || '(' || PROFIT || ')', ' | ') AS PTH,
 10              LTRIM(SYS_CONNECT_BY_PATH(PROFIT, '+'), '+') AS VAL
 11          FROM
 12              SCHEDULE_PROFILE
 13          CONNECT BY
 14              START_DATE > PRIOR END_DATE
 15      )
 16  ORDER BY VAL DESC FETCH FIRST ROW ONLY;

DATES
--------------------------------------------------------------------------------
       VAL
----------
06-JAN-18-10-FEB-18(50) | 11-FEB-18-28-FEB-18(150) | 02-MAY-18-30-MAY-18(200)
       400


SQL>

db<>fiddle demo

干杯！

SQL查询以查找最大获利时间表

3 个答案: