我必须插入表2字段(首先是主键(关于文章),第二个是关于它们的大小(这些文章)。
在源环境中,我有表,主键(TK文章)和大小到第二个字段的串联。但是,我必须插入目标表,TK文章和Artcles的几个大小。 例如,
来源:
ART SIZE**
1 | 28/30
2 | 30/32
3 | Size 10/Size 12/Size 14/Size 14
目标:
ART Size
1 | 28
1 | 30
2 | 30
2 | 32
3 | Size 10
3 | Size 12
3 | Size 14
3 | Size 16
困难在于知道该字段中包含多少'/'?
我做了一个查询
SELECT ART,
REGEXP_SUBSTR(SIZE,'[^/]+',1,level)
FROM TABLLE
CONNECT BY REGEXP_SUBSTR(SIZE,'[^/]+',1,level) IS NOT NULL;
select事务工作并在46秒内显示结果。但是TABLE有10万行,插入事务太长而且不起作用。
有人可以在这一点上帮助我吗?
谢谢&此致
答案 0 :(得分:1)
正则表达式计算起来非常昂贵。如果需要处理大量的行,我个人会使用存储过程 - 管道表函数:
-- table with 100000 rows
create table Tb_SplitStr(col1, col2) as
select level
, 'Size 10/Size 12/Size 14/Size 14/Size 15/Size 16/Size 17'
from dual
connect by level <= 100000
PL / SQL包:
create or replace package Split_Pkg as
type T_StrList is table of varchar2(1000);
function Str_Split(
p_str in varchar2,
p_dlm in varchar2
) return T_StrList pipelined;
end;
create or replace package body Split_Pkg as
function Str_Split(
p_str in varchar2,
p_dlm in varchar2
) return T_StrList pipelined
is
l_src_str varchar2(1000) default p_str;
l_dlm_pos number;
begin
while l_src_str is not null
loop
l_dlm_pos := instr(l_src_str, p_dlm);
case
when l_dlm_pos = 0
then pipe row (l_src_str);
l_src_str := '';
else pipe row(substr(l_src_str, 1, l_dlm_pos - 1));
l_src_str := substr(l_src_str, l_dlm_pos + 1);
end case;
end loop;
return;
end;
end;
带有regexp函数的SQL查询:
with ocrs(ocr) as(
select level
from ( select max(regexp_count(col2, '[^/]+')) as mx
from tb_splitStr) t
connect by level <= t.mx
)
select count(regexp_substr(s.col2, '[^/]+', 1, o.ocr)) as res
from tb_splitStr s
cross join ocrs o
结果:
-- SQL with regexp
SQL> with ocrs(ocr) as(
2 select level
3 from ( select max(regexp_count(col2, '[^/]+')) as mx
4 from tb_splitStr) t
5 connect by level <= t.mx
6 )
7 select count(regexp_substr(s.col2, '[^/]+', 1, o.ocr)) as res
8 from tb_splitStr s
9 cross join ocrs o
10 ;
Res
------------------------------
700000
Executed in 4.093 seconds
SQL> /
Res
------------------------------
700000
Executed in 3.812 seconds
--Query with pipelined table function
SQL> select count(*)
2 from Tb_SplitStr s
3 cross join table(split_pkg.Str_Split(s.col2, '/'))
4 ;
COUNT(*)
----------
700000
Executed in 2.469 seconds
SQL> /
COUNT(*)
----------
700000
Executed in 2.406 seconds
答案 1 :(得分:0)
This blogpost of mine显示了处理此查询的六种不同技术。
不同之处在于它处理日期并且您需要处理字符串。您可以通过在选择列表中使用“regexp_count(size,'/')+ 1”作为迭代终止符和regexp_substr(size,'[^ /] +',1,i)来解决此问题。
答案 2 :(得分:0)
如何使用某些XML?
> set serveroutput on
> drop table test_tab
table TEST_TAB dropped.
> create table test_tab
(
art number,
siz varchar2(100)
)
table TEST_TAB created.
> insert into test_tab values (1, '28/30')
1 rows inserted.
> insert into test_tab values (2, '30/32')
1 rows inserted.
> insert into test_tab values (3, 'Size 10/Size 12/Size 14/Size 14')
1 rows inserted.
> commit
committed.
> drop table test_tab2
table TEST_TAB2 dropped.
> create table test_tab2 as
select * from test_tab where 1=0
table TEST_TAB2 created.
> insert into test_tab2 (art, siz)
select art, extractvalue(x.column_value, 'e')
from test_tab, xmltable ('e' passing xmlparse( content '<e>' || replace(siz, '/', '</e><e>') || '</e>')) x
8 rows inserted.
> commit
committed.
> select * from test_tab2
ART SIZ
---------- ----------------------------------------------------------------------------------------------------
1 28
1 30
2 30
2 32
3 Size 10
3 Size 12
3 Size 14
3 Size 14
8 rows selected
这里又是,但最初有100,000行,并显示时间。插入400,000行仅需2分钟:
> set serveroutput on
> set timing on
> drop table test_tab
table TEST_TAB dropped.
Elapsed: 00:00:00.055
> create table test_tab
(
art number,
siz varchar2(100)
)
table TEST_TAB created.
Elapsed: 00:00:00.059
> --insert into test_tab values (1, '28/30');
> --insert into test_tab values (2, '30/32');
> --insert into test_tab values (3, 'Size 10/Size 12/Size 14/Size 14');
> insert into test_tab (art, siz)
select level, 'Size 10/Size 12/Size 14/Size 16'
from dual
connect by level <= 100000
100,000 rows inserted.
Elapsed: 00:00:00.191
> commit
committed.
Elapsed: 00:00:00.079
> drop table test_tab2
table TEST_TAB2 dropped.
Elapsed: 00:00:00.081
> create table test_tab2 as
select * from test_tab where 1=0
table TEST_TAB2 created.
Elapsed: 00:00:00.076
> -- perform inserts. This will result in 400,000 rows inserted
> -- note inserts are done conventionally (timing is acceptable)
> insert into test_tab2 (art, siz)
select art, extractvalue(x.column_value, 'e')
from test_tab, xmltable ('e' passing xmlparse( content '<e>' || replace(siz, '/', '</e><e>') || '</e>')) x
400,000 rows inserted.
Elapsed: 00:02:17.046
> commit
committed.
Elapsed: 00:00:00.094
> -- show some data in target table
> select * from test_tab2
where art = 1
ART SIZ
---------- ----------------------------------------------------------------------------------------------------
1 Size 10
1 Size 12
1 Size 14
1 Size 16
Elapsed: 00:00:00.103