表包含单列文本类型。 文本包含一行或多行,其中每行包含一个或多个由字母创建的部分。 目标是编写一个查询,为每个部分返回一个元组及其起点,终点和值。
create table t (txt varchar (1000));
insert into t (txt) values
(
'
AAAAAAAAAAAAAAAAAAAAAAAAAAAA BBBB CCCCCCCCCCCCCCCCCCCCCCCCC
DDDE FFFFFFFF GGGGGGGGG HHHHHHHH IIIIIII
JJ KKKLLL MM NN OOOOO
P QQ
'
)
;
*只需要最后3列(部分开始/结束/ val),其余部分用于调试目的。
line_ind section_ind section_length section_start section_end section_val
1 1 28 1 28 A
1 2 4 31 34 B
1 3 25 39 63 C
2 1 3 1 3 D
2 2 1 4 4 E
2 3 8 7 14 F
2 4 9 19 27 G
2 5 8 43 50 H
2 6 7 55 61 I
3 1 2 1 2 J
3 2 3 9 11 K
3 3 3 12 14 L
3 4 2 22 23 M
3 5 2 25 26 N
3 6 5 57 61 O
4 1 1 13 13 P
4 2 2 60 61 Q
答案 0 :(得分:0)
with l
as
(
select line_ind
,line
from table
(
regexp_split_to_table (-1,t.txt,'\r','')
returns (minus_one int,line_ind int,line varchar(1000))
)
as l
)
select l.line_ind
,r.section_ind
,char_length (r.section) as section_length
,regexp_instr (l.line,'(\S)\1*',1,r.section_ind,0) as section_start
,regexp_instr (l.line,'(\S)\1*',1,r.section_ind,1) - 1 as section_end
,substr (r.section,1,1) as section_val
from table
(
regexp_split_to_table (l.line_ind,regexp_replace (l.line,'(?<=(?P<c>.))(?!(?P=c))',' '),'\s+','')
returns (line_ind int,section_ind int,section varchar(1000))
)
as r
,l
where l.line_ind =
r.line_ind
order by l.line_ind
,r.section_ind
;
select regexp_instr (txt,'(\S)\1*',1,level,0) - instr (txt,chr(10),regexp_instr (txt,'(\S)\1*',1,level,0) - length (txt) - 1,1) as section_start
,regexp_instr (txt,'(\S)\1*',1,level,1) - 1 - instr (txt,chr(10),regexp_instr (txt,'(\S)\1*',1,level,0) - length (txt) - 1,1) as section_end
,regexp_substr (txt,'(\S)\1*',1,level,'',1) as section_val
from t
connect by level <= regexp_count (txt,'(\S)\1*')
;
答案 1 :(得分:0)
即使您有多个输入行,这也将起作用:
WITH lines ( txt, id, line, pos, line_no ) AS(
SELECT txt,
id,
REGEXP_SUBSTR( txt, '.*', 1, 1 ),
REGEXP_INSTR( txt, '.*', 1, 1, 1 ),
1
FROM t
UNION ALL
SELECT txt,
id,
REGEXP_SUBSTR( txt, '.*', pos + 1, 1 ),
REGEXP_INSTR( txt, '.*', pos + 1, 1, 1 ),
line_no + 1
FROM lines
WHERE pos > 0
),
words ( id, line, line_no, section_start, section_end, section_value ) AS (
SELECT id,
line,
line_no,
REGEXP_INSTR( line, '(\S)\1*', 1, 1, 0 ),
REGEXP_INSTR( line, '(\S)\1*', 1, 1, 1 ) - 1,
REGEXP_SUBSTR( line, '(\S)\1*', 1, 1, NULL, 1 )
FROM lines
WHERE pos > 0
AND line IS NOT NULL
UNION ALL
SELECT id,
line,
line_no,
REGEXP_INSTR( line, '(\S)\1*', section_end + 1, 1, 0 ),
REGEXP_INSTR( line, '(\S)\1*', section_end + 1, 1, 1 ) - 1,
REGEXP_SUBSTR( line, '(\S)\1*', section_end + 1, 1, NULL, 1 )
FROM words
WHERE section_end > 0
)
SELECT id,
line_no,
section_start,
section_end,
section_value
FROM words
WHERE section_end > 0
ORDER BY id, line_no, section_start
因此,对于输入数据(添加了id
列以便能够轻松区分文本片段)
create table t (id NUMBER(5,0), txt varchar (1000));
insert into t (id, txt) values
(
1,
'
AAAAAAAAAAAAAAAAAAAAAAAAAAAA BBBB CCCCCCCCCCCCCCCCCCCCCCCCC
DDDE FFFFFFFF GGGGGGGGG HHHHHHHH IIIIIII
JJ KKKLLL MM NN OOOOO
P QQ
'
);
insert into t (id, txt) values ( 2, 'RRRSTT UUU V WXYZ' );
这将输出:
ID | LINE_NO | SECTION_START | SECTION_END | SECTION_VALUE -: | ------: | ------------: | ----------: | :------------ 1 | 2 | 1 | 28 | A 1 | 2 | 31 | 34 | B 1 | 2 | 39 | 63 | C 1 | 3 | 1 | 3 | D 1 | 3 | 4 | 4 | E 1 | 3 | 7 | 14 | F 1 | 3 | 19 | 27 | G 1 | 3 | 43 | 50 | H 1 | 3 | 55 | 61 | I 1 | 4 | 1 | 2 | J 1 | 4 | 9 | 11 | K 1 | 4 | 12 | 14 | L 1 | 4 | 22 | 23 | M 1 | 4 | 25 | 26 | N 1 | 4 | 57 | 61 | O 1 | 5 | 13 | 13 | P 1 | 5 | 60 | 61 | Q 2 | 1 | 1 | 3 | R 2 | 1 | 4 | 4 | S 2 | 1 | 5 | 6 | T 2 | 1 | 8 | 10 | U 2 | 1 | 15 | 15 | V 2 | 1 | 17 | 17 | W 2 | 1 | 18 | 18 | X 2 | 1 | 19 | 19 | Y 2 | 1 | 20 | 20 | Z
db <>提琴here