消除Oracle Array of Object中的重复记录

时间:2016-06-22 17:24:17

标签: sql oracle plsql

我确实在Oracle中声明了以下对象和数组:

create or replace
TYPE TestObj as
OBJECT (parent_key varchar2(20), child_key varchar2(20),

dt date,other1 varchar2(20),other2 varchar2(20)); 和

create or replace 
    TYPE TestArray as TABLE OF TestObj;

现在在我的一个SP中,我将以下类型作为输入:

IN_TestArray IN TestArray

现在在这个数组中,我得到的数据集包括一些重复项:

Id  Parent_key    Child_Key   dt         other1     other2
===========================================================
1    AT              AA      2016-06-22    test1       test2
2    AT              A0      2016-06-22    test3       test4
3    BT              BA      2016-06-22    test5       test6
4    AT              AA      2016-06-22    test1       test2
5    AT              AA      2016-06-21    test21      test22
6    CT              CA      2016-06-22    test23      test24
7    AT              AA      2016-06-22    test31      test32

这里我希望通过 parent_key,child_key和dt的组合来消除数组中的重复记录。

因此更新的数组在进程后应该只有以下行:

(any one from 1, 4, 7)
2
3
5
6

任何帮助都会受到赞赏.....

2 个答案:

答案 0 :(得分:1)

使用SET() collection function删除完全重复的内容。

Oracle安装程序

CREATE TYPE TestObj AS OBJECT (
  parent_key varchar2(20),
  child_key  varchar2(20),
  dt         date,
  other1     varchar2(20),
  other2     varchar2(20)
);
/

CREATE TYPE TestArray AS TABLE OF TestObj;
/

<强>查询

SELECT *
FROM   TABLE(
         SET(
          TestArray(
            TestObj( 'AT', 'AA', DATE '2016-06-22', 'test1', 'test2' ),
            TestObj( 'AT', 'AA', DATE '2016-06-22', 'test1', 'test2' )
          )
         )
       );

<强>输出

PARENT_KEY   CHILD_KEY    DT                  OTHER1       OTHER2
------------ ------------ ------------------- ------------ ------------
AT           AA           2016-06-22 00:00:00 test1        test2

如果您只想在行的子集上获取重复项,则可以使用:

SELECT TestObj(
         parent_key,
         child_key,
         dt,
         MIN( other1 ) KEEP ( DENSE_RANK FIRST ORDER BY ROWNUM ),
         MIN( other2 ) KEEP ( DENSE_RANK FIRST ORDER BY ROWNUM )
       )
BULK COLLECT INTO array_unique
FROM   TABLE( array_with_dups )
GROUP BY parent_key, child_key, dt;

像这样:

SET SERVEROUTPUT ON;

DECLARE
  array_with_dups TestArray
       := TestArray(
            TestObj( 'AT', 'AA', DATE '2016-06-22', 'test1', 'test2' ),
            TestObj( 'AT', 'AA', DATE '2016-06-22', 'test1', 'test2' ),
            TestObj( 'AT', 'AA', DATE '2016-06-22', 'test31', 'test32' )
          );
  array_unique TestArray;
BEGIN
  SELECT TestObj(
           parent_key,
           child_key,
           dt,
           MIN( other1 ) KEEP ( DENSE_RANK FIRST ORDER BY ROWNUM ),
           MIN( other2 ) KEEP ( DENSE_RANK FIRST ORDER BY ROWNUM )
         )
  BULK COLLECT INTO array_unique
  FROM   TABLE( array_with_dups )
  GROUP BY parent_key, child_key, dt;

  FOR i IN 1 .. array_unique.COUNT LOOP
    DBMS_OUTPUT.PUT_LINE(
      array_unique(i).parent_key
      || ', ' || array_unique(i).child_key
      || ', ' || array_unique(i).dt
      || ', ' || array_unique(i).other1
      || ', ' || array_unique(i).other2
    );
  END LOOP;
END;
/

<强>输出

AT, AA, 2016-06-22 00:00:00, test1, test2

答案 1 :(得分:0)

您可以delete from a collection,但需要确定要使用的索引值。这有点乱,但应该有效:

  for r in (
    select i from (
      select t.*, rownum as i,
        row_number() over (partition by parent_key, child_key, dt
          order by other1, other2) as key_rn
      from table(in_testarray) t
      )
    where key_rn > 1
  )
  loop
    in_testarray.delete(r.i);
  end loop;

演示:

declare
 in_testarray testarray;
begin
  select *
  bulk collect into in_testarray
  from (
    select testobj('AT', 'AA', date '2016-06-22', 'test1', 'test2') from dual
    union all select testobj('AT', 'A0', date '2016-06-22', 'test3', 'test4') from dual
    union all select testobj('BT', 'BA', date '2016-06-22', 'test5', 'test6') from dual
    union all select testobj('AT', 'AA', date '2016-06-22', 'test1', 'test2') from dual
    union all select testobj('AT', 'AA', date '2016-06-21', 'test21', 'test22') from dual
    union all select testobj('CT', 'CA', date '2016-06-22', 'test23', 'test24') from dual
    union all select testobj('AT', 'AA', date '2016-06-22', 'test31', 'test32') from dual
  );

  for r in (
    select i from (
      select t.*, rownum as i,
        row_number() over (partition by parent_key, child_key, dt
          order by other1, other2) as key_rn
      from table(in_testarray) t
      )
    where key_rn > 1
  )
  loop
    dbms_output.put_line('deleting ' || r.i);
    in_testarray.delete(r.i);
  end loop;

  for i in in_testarray.first..in_testarray.last loop
    if in_testarray.exists(i) then
      dbms_output.put_line(i ||': '|| in_testarray(i).parent_key ||':'|| in_testarray(i).child_key
        ||':'|| in_testarray(i).dt ||':'|| in_testarray(i).other1 ||':'|| in_testarray(i).other2);
    end if;
  end loop;
end;
/

PL/SQL procedure successfully completed.

该系列最终有五名成员:

PL/SQL procedure successfully completed.

deleting 4
deleting 7
1: AT:AA:2016-06-22:test1:test2
2: AT:A0:2016-06-22:test3:test4
3: BT:BA:2016-06-22:test5:test6
5: AT:AA:2016-06-21:test21:test22
6: CT:CA:2016-06-22:test23:test24

但是直接修改传入的数组可能不是一个好主意(无论如何都不允许使用IN参数),所以你应该修改原始数组的副本。

或者你可以制作一个副本,但不包括重复的副本;如果你宣布说同一类型的my_testarray

  select testobj(parent_key, child_key, dt, other1, other2)
  bulk collect into my_testarray
  from (
    select t.*,
      row_number() over (partition by parent_key, child_key, dt
        order by other1, other2) as key_rn
    from table(in_testarray) t
  )
  where key_rn = 1;

然后使用该集合而不是原始集合。

快速演示:

declare
 in_testarray testarray;
 my_testarray testarray;
begin
  select *
  bulk collect into in_testarray
  from (
    select testobj('AT', 'AA', date '2016-06-22', 'test1', 'test2') from dual
    union all select testobj('AT', 'A0', date '2016-06-22', 'test3', 'test4') from dual
    union all select testobj('BT', 'BA', date '2016-06-22', 'test5', 'test6') from dual
    union all select testobj('AT', 'AA', date '2016-06-22', 'test1', 'test2') from dual
    union all select testobj('AT', 'AA', date '2016-06-21', 'test21', 'test22') from dual
    union all select testobj('CT', 'CA', date '2016-06-22', 'test23', 'test24') from dual
    union all select testobj('AT', 'AA', date '2016-06-22', 'test31', 'test32') from dual
  );

  select testobj(parent_key, child_key, dt, other1, other2)
  bulk collect into my_testarray
  from (
    select t.*,
      row_number() over (partition by parent_key, child_key, dt
        order by other1, other2) as key_rn
    from table(in_testarray) t
  )
  where key_rn = 1;

  for i in my_testarray.first..my_testarray.last loop
    dbms_output.put_line(i ||': '|| my_testarray(i).parent_key ||':'|| my_testarray(i).child_key
      ||':'|| my_testarray(i).dt ||':'|| my_testarray(i).other1 ||':'|| my_testarray(i).other2);
  end loop;
end;
/

获得相同的输出,但该集合现在不稀疏:

PL/SQL procedure successfully completed.

1: AT:A0:2016-06-22:test3:test4
2: AT:AA:2016-06-21:test21:test22
3: AT:AA:2016-06-22:test1:test2
4: BT:BA:2016-06-22:test5:test6
5: CT:CA:2016-06-22:test23:test24

或者您可以在使用原始集合时执行类似的操作,例如:在表集合表达式中。