我有一个看起来像 -
的字符串12361_BBMS_GTECHL|12362_BBMS_PRIM|12363_BBMS_SEC|....and so on
所以我需要获取
12361 and BBMS_GTECHL
12362 and BBMS_PRIM
12363 and BBMS_SEC
我用过 -
select *
FROM
TABLE(XMLSEQUENCE(
EXTRACT(
XMLTYPE('<rowset><row><Code>'||
replace(replace('12361=BBMS_GTECHL|12362=BBMS_PRIM','|','</Value></row><row><Code>'),'=','</Code><Value>')||'</Value>'||'</row></rowset>'),'/rowset/row')));
declare
l_val varchar2(1000);
begin
select substr('12361_BBMS_GTECHL|12362_BBMS_PRIM', instr('|')+1) into l_val from dual;
dbms_output.put_line(l_val);
end;
但是在获得理想的结果方面遇到了问题! 我需要在一个包中编写这个逻辑,如果我在这里得到一些提示,我将会这样做。
我的数据库版本是 -
Oracle Database 12c Enterprise Edition Release 12.1.0.2.0 - 64bit Production
答案 0 :(得分:2)
这是使用递归因式子查询(“递归CTE”)的解决方案。请注意使用指向管道符号位置的指针和每个管道后面的第一个下划线(忽略其他下划线)。此外,该解决方案仅使用标准INSTR和SUBSTR,避免使用正则表达式(执行速度稍慢 - 如果处理大量数据,则很重要)。
with input_data (input_str) as (
select '12361_BBMS_GTECHL|12362_BBMS_PRIM|12363_BBMS_SEC' from dual
),
t (str) as (
select '|' || input_str || '|' from input_data
),
r (lvl, code, descr, str, p1_from, p2_from, p1_to, p2_to) as (
select 0, null, null, str, 1, 1, instr(str, '_', 1, 1), instr(str, '|', 1, 2)
from t
union all
select lvl+1, substr(str, p2_from + 1, p1_to - p2_from - 1),
substr(str, p1_to + 1, p2_to - p1_to - 1),
str, p1_to, p2_to, instr(str, '_', p2_to + 1, 1),
instr(str, '|', p2_to + 1, 1)
from r
where p1_to != 0
)
select code, descr
from r
where lvl != 0;
<强>输出强>:
CODE DESCR
------- --------------------
12361 BBMS_GTECHL
12362 BBMS_PRIM
12363 BBMS_SEC
答案 1 :(得分:0)
如果我是你,我的主要考虑因素是性能,我会使用表格函数。 mathguys解决方案可以很好地工作,但如果我们使用流水线函数,它会更高效。
首先,我们创建我们的功能所必需的类型。
drop type type_test_table;
drop type type_test_row;
CREATE TYPE type_test_row AS OBJECT (
code varchar2(2000),
descr VARCHAR2(50)
)
/
CREATE TYPE type_test_table IS TABLE OF type_test_row
/
然后我们创建我们的函数:
create or replace function test_pipe_func return type_test_table pipelined as
cursor c_data_in is
select '12361'||level||'_BBMS_GTECHL'||level||'|12362'||level||'_BBMS_PRIM'||level||'|12363'||level||'_BBMS_SEC'||level||'|12364'||level||'_BBU_SEC'||level as str from dual
connect by level <= 1000000;
v_element varchar2(300);
v_code varchar2(100);
v_descr varchar2(200);
p_deb number;
p_fin number;
begin
for l_data_in in c_data_in loop
p_deb := 0;
p_fin := 1;
while p_fin > 0 loop
p_fin := case when p_deb = 0 then instr(l_data_in.str,'|',1, 1) else instr(l_data_in.str,'|',p_deb-1, 2) end;
p_deb := case when p_deb = 0 then 1 else instr(l_data_in.str,'|',p_deb-1, 1)+1 end;
v_element := case when p_fin = 0 then substr(l_data_in.str, p_deb) else substr(l_data_in.str, p_deb, p_fin - p_deb) end;
p_deb := p_fin +1;
v_code := substr(v_element, 1 , instr(v_element, '_' , 1,1)-1);
v_descr := substr(v_element, instr(v_element, '_' , 1,1)+1);
pipe row(type_test_row(v_code, v_descr));
end loop;
end loop;
end test_pipe_func;
/
我稍微更改了测试用例,以便能够为我的测试生成尽可能多的行。我使用流水线功能来限制大数据集的进程内存使用,并能够与select一起使用。如果您的用例不同(我不知道可能使用输入插入表格)另一个选项可能是使用批量收集和forall。
create or replace procedure test_bulk_collect_proc as
cursor c_data_in is
select '12361'||level||'_BBMS_GTECHL'||level||'|12362'||level||'_BBMS_PRIM'||level||'|12363'||level||'_BBMS_SEC'||level as str from dual
connect by level <= 1000000;
type type_table_data_in is table of c_data_in%rowtype;
table_data_in type_table_data_in;
v_element varchar2(300);
v_code varchar2(100);
v_descr varchar2(200);
p_deb number;
p_fin number;
v_str varchar2(4000);
v_t_insr type_test_table;
limit_in number := 100000;
i number;
begin
OPEN c_data_in;
LOOP
FETCH c_data_in BULK COLLECT INTO table_data_in LIMIT limit_in;
v_t_insr := type_test_table();
i := 1;
for indx IN 1 .. table_data_in.COUNT LOOP
v_str := table_data_in(indx).str;
p_deb := 0;
p_fin := 1;
while p_fin > 0 loop
p_fin := case when p_deb = 0 then instr(v_str,'|',1, 1) else instr(v_str,'|',p_deb-1, 2) end;
p_deb := case when p_deb = 0 then 1 else instr(v_str,'|',p_deb-1, 1)+1 end;
v_element := case when p_fin = 0 then substr(v_str, p_deb) else substr(v_str, p_deb, p_fin - p_deb) end;
p_deb := p_fin +1;
v_code := substr(v_element, 1 , instr(v_element, '_' , 1,1)-1);
v_descr := substr(v_element, instr(v_element, '_' , 1,1)+1);
v_t_insr.extend;
v_t_insr(i) := type_test_row(v_code, v_descr);
i:= i+1;
end loop;
END LOOP;
forall t in v_t_insr.first..v_t_insr.last
insert into test_bbu(CODE, DESCR) values (v_t_insr(t).code, v_t_insr(t).descr);
EXIT WHEN table_data_in.COUNT < limit_in;
END LOOP;
End;
/
我在我的数据库上测试了所有三种方法。为了测试mathguy的sql和我使用CTAS的流水线函数,并且我只是执行了程序。
create table test_bbu as
with input_data (input_str) as (
select '12361'||level||'_BBMS_GTECHL'||level||'|12362'||level||'_BBMS_PRIM'||level||'|12363'||level||'_BBMS_SEC'||level from dual
connect by level <= 1000000
),
t (str) as (
select '|' || input_str || '|' from input_data
),
r (lvl, code, descr, str, p1_from, p2_from, p1_to, p2_to) as (
select 0, null, null, str, 1, 1, instr(str, '_', 1, 1), instr(str, '|', 1, 2)
from t
union all
select lvl+1, substr(str, p2_from + 1, p1_to - p2_from - 1),
substr(str, p1_to + 1, p2_to - p1_to - 1),
str, p1_to, p2_to, instr(str, '_', p2_to + 1, 1),
instr(str, '|', p2_to + 1, 1)
from r
where p1_to != 0
)
select code, descr
from r
where lvl != 0;
create table test_bbu2 as
select * from table(test_pipe_func);
execute test_bulk_collect_proc;
我用500K和1M线测试了三种方法。以下是我的结果,但我建议您在做出决定之前先测试一下环境。
500K 1M
----------------------------------------
SQL 36s 1m:15s
Pipelined 11s 23s
Bulk Collect 8s 17s