我有一个具有以下结构的表:
表1
f_name f_content
test1.txt |0002434299|354534|535345345|05|||BCV RESULT # 174|Test 12%|
test2.txt |543566677|HTTYE|9w5w RRLL|05|||BBN RESULT # 144|Test 15#%|3
我需要使用管道(|)分隔f_content并放置字符串的适当位置。
输出表是:
f_name position value
test1.txt 1 (null)
test1.txt 2 0002434299
test1.txt 3 354534
test1.txt 4 535345345
test1.txt 5 05
test1.txt 6 (null)
test1.txt 7 (null)
test1.txt 8 BCV RESULT # 174
test1.txt 9 Test 12%
test1.txt 10 (null)
test2.txt 1 (null)
test2.txt 2 543566677
test2.txt 3 HTTYE
test2.txt 4 9w5w RRLL
test2.txt 5 05
test2.txt 6 (null)
test2.txt 7 (null)
test2.txt 8 BBN RESULT # 144
test2.txt 9 Test 15#%
test2.txt 10 3
表1中有超过50万条记录。每个记录有200多个管道。
有没有一种方法可以编写优化的查询,以便它可以处理200多个管道中的500K条记录而无需填充撤消表空间?
是否可以将SQL查询编写为以块的形式处理并将其继续插入输出表中?
答案 0 :(得分:2)
您不需要(慢速)正则表达式,并且可以使用简单的字符串函数来做到这一点:
Oracle设置:
CREATE TABLE table1 ( f_name, f_content ) AS
SELECT 'test1.txt', '|0002434299|354534|535345345|05|||BCV RESULT # 174|Test 12%|' FROM DUAL UNION ALL
SELECT 'test2.txt', '|543566677|HTTYE|9w5w RRLL|05|||BBN RESULT # 144|Test 15#%|3' FROM DUAL
CREATE TABLE output_table (
f_name VARCHAR2(20),
position NUMBER(4,0),
value VARCHAR2(50)
);
插入声明:
INSERT INTO output_table ( f_name, position, value )
WITH rsqfc ( f_name, f_content, idx, spos, epos ) AS (
SELECT f_name, f_content, 1, 1, INSTR( f_content, '|', 1 )
FROM table1
UNION ALL
SELECT f_name, f_content, idx + 1, epos + 1, INSTR( f_content, '|', epos + 1 )
FROM rsqfc
WHERE epos > 0
)
SELECT f_name,
idx,
CASE
WHEN epos > 0
THEN SUBSTR( f_content, spos, epos - spos )
ELSE SUBSTR( f_content, spos )
END
FROM rsqfc
输出:
SELECT *
FROM output_table
ORDER BY f_name, position
F_NAME | POSITION | VALUE :-------- | -------: | :--------------- test1.txt | 1 | null test1.txt | 2 | 0002434299 test1.txt | 3 | 354534 test1.txt | 4 | 535345345 test1.txt | 5 | 05 test1.txt | 6 | null test1.txt | 7 | null test1.txt | 8 | BCV RESULT # 174 test1.txt | 9 | Test 12% test1.txt | 10 | null test2.txt | 1 | null test2.txt | 2 | 543566677 test2.txt | 3 | HTTYE test2.txt | 4 | 9w5w RRLL test2.txt | 5 | 05 test2.txt | 6 | null test2.txt | 7 | null test2.txt | 8 | BBN RESULT # 144 test2.txt | 9 | Test 15#% test2.txt | 10 | 3
db <>提琴here
答案 1 :(得分:1)
您可以结合使用regexp_substr()
窗口分析功能和connect by level <= regexp_count(f_content,'\|')
with t(f_name,f_content) as
(
select 'test1.txt','|0002434299|354534|535345345|05|||BCV RESULT # 174|Test 12%|'
from dual
union all
select 'test2.txt','|543566677|HTTYE|9w5w RRLL|05|||BBN RESULT # 144|Test 15#%|3'
from dual
)
select f_name,
level as position,
replace( regexp_substr(replace(f_content,'|',' |'),
'([^\|])+',
1,
level
),' ',null) as value
from t
connect by level <= regexp_count(f_content,'\|') + 1
and prior f_name = f_name and prior sys_guid() is not null