在两个表之间逐步关联项目

时间:2013-10-14 11:03:33

标签: sql oracle oracle11g

我有两张桌子。 首先包含一些激活,第二个包含一些停用。

我必须使用以下规则将一个停用与一个激活相关联:

  • 激活必须在停用之前,但不能早于 92天。
  • 无法关联已与deact关联的激活 试。

所以,使用一些数据:

--a activations, b - deactivations
create table a (id1 integer, date1 date);
create table b (id2 integer, date2 date);

insert into a values (1, '1-Feb-2013');
insert into a values (2, '2-Feb-2013');
insert into a values (3, '3-Feb-2013');
insert into a values (4, '1-Mar-2013');
insert into a values (5, '2-Mar-2013');
insert into a values (6, '1-May-2013');
insert into a values (7, '19-May-2013');

insert into b values (1, '1-May-2013');
insert into b values (2, '1-May-2013');
insert into b values (3, '15-May-2013');
insert into b values (4, '16-May-2013');
insert into b values (5, '17-May-2013');
insert into b values (6, '18-May-2013');

期望的输出:

id1 date1                           id2     date2                           
1   February, 01 2013 00:00:00+0000 1   May, 01 2013 00:00:00+0000  1   1   
2   February, 02 2013 00:00:00+0000 2   May, 01 2013 00:00:00+0000  2   2   
4   March, 01 2013 00:00:00+0000    3   May, 15 2013 00:00:00+0000  4   3   
5   March, 02 2013 00:00:00+0000    4   May, 16 2013 00:00:00+0000  5   4   
6   May, 01 2013 00:00:00+0000      5   May, 17 2013 00:00:00+0000  6   5   

生成候选人的查询将是:

select id1, date1, id2, date2
from a
join b 
on a.date1 >= b.date2 - 91 
and b.date2 >= a.date1;

我成功使用connect by创建了一个正确的查询,但速度太慢(我有数百万个客户端,每个客户端有数千个激活和停用设备。示例适用于一个客户端。)

with  chrn as
(
select id1, date1, id2, date2,
      dense_rank() over (  order by date1, id1) as act_ord, 
      dense_rank() over ( order by date2, id2) as deact_ord 
from a
join b 
on a.date1 >= b.date2 - 91 
and b.date2 >= a.date1
)
select * 
from (
  select s.*, row_number() over (partition by lvl order by act_ord+deact_ord) as rnk
  from (
      select a1.*, level lvl 
      from chrn a1
       connect by 
       prior deact_ord < deact_ord and 
       prior act_ord < act_ord and 
      (prior deact_ord = deact_ord - 1 or prior act_ord = act_ord - 1) 

      start with deact_ord = 1 and act_ord = 1
  )s
)where rnk =1
;

see sqlfiddle

我想找到一个更快的解决方案,可能只使用分析函数。由于候选者和路径数量很大,递归查询太慢。或者我没有成功减少候选人和路径的数量。

2 个答案:

答案 0 :(得分:1)

随着记录数量的增加,您的要求无法很好地扩展,因为必须找到所有前面的对才能找到下一对。

当然,只要你不得不这样做一次就没有办法解决这个问题。但如果您经常需要找到新的对,我强烈建议您将deact_id添加到table1

create or replace trigger BI_B after insert on B for each row
begin
  for c in 
   (select rowid 
    from   A
    where  date1    >= :new.date2 - 91 
      and  date1     < :new.date2
      and  deact_id is null 
    order by date1
   )
  loop
    update A
    set    deact_id = :new.id2
    where  rowid    = c.rowid;

    exit;
  end loop;
end;

答案 1 :(得分:0)

试试这个:

CREATE TABLE A ( ID1  INTEGER,
              DATE1 DATE );

CREATE TABLE B ( ID2  INTEGER,
              DATE2 DATE );

INSERT INTO
      A
VALUES
      ( 1,
        '1-Feb-2013' );

INSERT INTO
      A
VALUES
      ( 2,
        '2-Feb-2013' );

INSERT INTO
      A
VALUES
      ( 3,
        '3-Feb-2013' );

INSERT INTO
      A
VALUES
      ( 4,
        '1-Mar-2013' );

INSERT INTO
      A
VALUES
      ( 5,
        '2-Mar-2013' );

INSERT INTO
      A
VALUES
      ( 6,
        '1-May-2013' );

INSERT INTO
      A
VALUES
      ( 7,
        '19-May-2013' );

INSERT INTO
      B
VALUES
      ( 1,
        '1-May-2013' );

INSERT INTO
      B
VALUES
      ( 2,
        '1-May-2013' );

INSERT INTO
      B
VALUES
      ( 3,
        '15-May-2013' );

INSERT INTO
      B
VALUES
      ( 4,
        '16-May-2013' );

INSERT INTO
      B
VALUES
      ( 5,
        '17-May-2013' );

INSERT INTO
      B
VALUES
      ( 6,
        '18-May-2013' );

COMMIT;

BEGIN
    DBMS_STATS.SET_TABLE_STATS ( OWNNAME     => 'REALSPIRITUALS',
                            TABNAME  => 'A',
                            NUMROWS  => 100000000  );
END;
/

BEGIN
    DBMS_STATS.SET_TABLE_STATS ( OWNNAME     => 'REALSPIRITUALS',
                            TABNAME  => 'B',
                            NUMROWS  => 100000000  );
END;
/

您的查询

SET AUTOTRACE ON

WITH CHRN
    AS (SELECT
             ID1,
             DATE1,
             ID2,
             DATE2,
             DENSE_RANK ( )
                 OVER ( ORDER BY
                           DATE1,
                           ID1 )
                 AS ACT_ORD,
             DENSE_RANK ( )
                 OVER ( ORDER BY
                           DATE2,
                           ID2 )
                 AS DEACT_ORD
        FROM
                 A
             JOIN
                 B
             ON A.DATE1 >= B.DATE2
                        - 91
                AND B.DATE2 >= A.DATE1)
SELECT
      *
FROM
      (SELECT
            S.*,
            ROW_NUMBER ( )
                OVER ( PARTITION BY LVL
                      ORDER BY
                          ACT_ORD
                          + DEACT_ORD )
                AS RNK
       FROM
            (SELECT
                   A1.*,
                   LEVEL LVL
             FROM
                   CHRN A1
             CONNECT BY
                      PRIOR DEACT_ORD < DEACT_ORD
                   AND PRIOR ACT_ORD < ACT_ORD
                   AND ( PRIOR DEACT_ORD = DEACT_ORD
                                      - 1
                       OR PRIOR ACT_ORD = ACT_ORD
                                      - 1 )
             START WITH
                   DEACT_ORD = 1
                   AND ACT_ORD = 1) S)
WHERE
      RNK = 1;

通过CBO查询您的问题:

       ID1 DATE1            ID2 DATE2        ACT_ORD  DEACT_ORD        LVL      RNK
---------- --------- ---------- --------- ---------- ---------- ----------      ----------
         1 01-FEB-13          1 01-MAY-13          1          1          1         1
         2 02-FEB-13          2 01-MAY-13          2          2          2         1
         4 01-MAR-13          3 15-MAY-13          4          3          3         1
         5 02-MAR-13          4 16-MAY-13          5          4          4         1
         6 01-MAY-13          5 17-MAY-13          6          5          5         1

5 rows selected.

Execution Plan
----------------------------------------------------------
   0       SELECT STATEMENT Optimizer Mode=ALL_ROWS (Cost=16 G Card=25000 G Bytes=2235174G)
   1    0    TEMP TABLE TRANSFORMATION
   2    1      LOAD AS SELECT
   3    2        WINDOW SORT (Cost=7 G Card=25000 G Bytes=1024454G)
   4    3          WINDOW SORT (Cost=7 G Card=25000 G Bytes=1024454G)
   5    4            MERGE JOIN (Cost=2 G Card=25000 G Bytes=1024454G)
   6    5              SORT JOIN (Cost=667123 Card=100 M Bytes=2G)
   7    6                TABLE ACCESS FULL SRINIV.A (Cost=770 Card=100 M Bytes=2G)
   8    5              FILTER
   9    8                SORT JOIN (Cost=667123 Card=100 M Bytes=2G)
  10    9                  TABLE ACCESS FULL SRINIV.B (Cost=770 Card=100 M Bytes=2G)
  11    1      VIEW (Cost=9 G Card=25000 G Bytes=2235174G)
  12   11        WINDOW SORT PUSHED RANK (Cost=9 G Card=25000 G Bytes=1932494G)
  13   12          VIEW (Cost=887 M Card=25000 G Bytes=1932494G)
  14   13            CONNECT BY NO FILTERING WITH START-WITH
  15   14              COUNT
  16   15                VIEW (Cost=887 M Card=25000 G Bytes=1629814G)
  17   16                  TABLE ACCESS FULL SYS.SYS_TEMP_0FD9D6820_3AD00CE0 (Cost=887 M Card=25000 G Bytes=1024454G)


Statistics
----------------------------------------------------------
          2  recursive calls
          0  spare statistic 3
          0  gcs messages sent
          7  db block gets from cache
          0  physical reads direct (lob)
          0  queue position update
          0  queue single row
          0  queue ocp pages
          0  HSC OLTP Compressed Blocks
          0  HSC IDL Compressed Blocks
          5  rows processed

新查询

 SET AUTOTRACE ON


WITH CHRN
    AS (SELECT
             ID1,
             DATE1,
             ID2,
             DATE2,
             RANK ( )
                 OVER ( ORDER BY
                           DATE1,
                           ID1 )
                 AS ACT_ORD,
             RANK ( )
                 OVER ( ORDER BY
                           DATE2,
                           ID2 )
                 AS DEACT_ORD
        FROM
             A,
             B
        WHERE
             DATE2
             - DATE1 < 92
             AND ID1 = ID2)
SELECT
      *
FROM
      (SELECT
            S.*,
            ROW_NUMBER ( )
                OVER ( PARTITION BY LVL
                      ORDER BY
                          ACT_ORD
                          + DEACT_ORD )
                AS RNK
       FROM
            (SELECT
                   A1.*,
                   LEVEL LVL
             FROM
                   CHRN A1
             CONNECT BY
                      PRIOR DEACT_ORD < DEACT_ORD
                   AND PRIOR ACT_ORD < ACT_ORD
                   AND ( PRIOR DEACT_ORD = DEACT_ORD
                                      - 1
                       OR PRIOR ACT_ORD = ACT_ORD
                                      - 1 )
             START WITH
                   DEACT_ORD = 1
                   AND ACT_ORD = 1) S)
WHERE
      RNK = 1;

CBO对新查询提出的建议

       ID1 DATE1            ID2 DATE2        ACT_ORD  DEACT_ORD        LVL      RNK
---------- --------- ---------- --------- ---------- ---------- ----------      ----------
         1 01-FEB-13          1 01-MAY-13          1          1          1         1
         2 02-FEB-13          2 01-MAY-13          2          2          2         1
         4 01-MAR-13          3 15-MAY-13          4          3          3         1
         5 02-MAR-13          4 16-MAY-13          5          4          4         1
         6 01-MAY-13          5 17-MAY-13          6          5          5         1

5 rows selected.



Execution Plan
----------------------------------------------------------
 0       SELECT STATEMENT Optimizer Mode=ALL_ROWS (Cost=538808 Card=5 M Bytes=457 M)
 1    0    TEMP TABLE TRANSFORMATION
 2    1      LOAD AS SELECT
 3    2        WINDOW SORT (Cost=436441 Card=5 M Bytes=209 M)
 4    3          WINDOW SORT (Cost=436441 Card=5 M Bytes=209 M)
 5    4            HASH JOIN (Cost=324556 Card=5 M Bytes=209 M)
 6    5              TABLE ACCESS FULL REALSPIRITUALS.A (Cost=770 Card=100 M Bytes=2G)
 7    5              TABLE ACCESS FULL REALSPIRITUALS.B (Cost=770 Card=100 M Bytes=2G)
 8    1      VIEW (Cost=102367 Card=5 M Bytes=457 M)
 9    8        WINDOW SORT PUSHED RANK (Cost=102367 Card=5 M Bytes=395 M)
10    9          VIEW (Cost=5816 Card=5 M Bytes=395 M)
11   10            CONNECT BY NO FILTERING WITH START-WITH
12   11              COUNT
13   12                VIEW (Cost=5816 Card=5 M Bytes=333 M)
14   13                  TABLE ACCESS FULL SYS.SYS_TEMP_0FD9D6822_3AD00CE0 (Cost=5816 Card=5 M Bytes=209 M)

Statistics
----------------------------------------------------------
        2  recursive calls
        0  spare statistic 3
        0  gcs messages sent
        7  db block gets from cache
        0  physical reads direct (lob)
        0  queue position update
        0  queue single row
        0  queue ocp pages
        0  HSC OLTP Compressed Blocks
        0  HSC IDL Compressed Blocks
5  rows processed