我在分隔符之间包含了一个文本oracle。如果可能,请帮助创建文本的正则表达式。我有一个文字的例子
12322ABCD124A||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!||
直到现在我才能拿到:
||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!
使用此(\|\|(.*))+([^\|\|])
。
但我需要将这些数据与||分开!!然后从!!分开。之后我需要将它保存到这样的数组中:
array [1] =(123,word1,word2,word3)
array [2] =(789,word4,word5,word6)
array [3] =(2345,word7,word8,890)
答案 0 :(得分:1)
这个应该有效:
with v1 as
(
select '12322ABCD124A||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!||' t from dual
)
select level -1 id, trim(',' from regexp_replace(regexp_substr(t,'[^\|]+',1,level),'!!',',')) array from v1
where level > 1
connect by level <= regexp_count(t,'\|\|');
输出:
ID ARRAY
---------- --------------------------
1 123,word1 ,word2, word3
2 789,word4,word5 , word6
3 2345 ,word7,word8, 890
如果零件数量是常数(4)并且您希望它们位于不同的列中:
with v1 as
(
select '12322ABCD124A||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!||' t from dual
), v2 as
(
select level -1 id, trim(',' from regexp_replace(regexp_substr(t,'[^\|]+',1,level),'!!',',')) array
from v1
where level > 1
connect by level <= regexp_count(t,'\|\|')
)
select id,
regexp_substr(array,'[^,]+',1,1) val1,
regexp_substr(array,'[^,]+',1,2) val2,
regexp_substr(array,'[^,]+',1,3) val3,
regexp_substr(array,'[^,]+',1,4) val4
from v2;
输出:
ID VAL1 VAL2 VAL3 VAL4
---------- ---------- ---------- ---------- ----------
1 123 word1 word2 word3
2 789 word4 word5 word6
3 2345 word7 word8 890
PLSQL STYLE :
declare
type t_text_array is table of varchar2(4000);
v_text_array t_text_array := t_text_array();
val varchar2(4000);
cursor c1 is
select '12322ABCD124A||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!||' t from dual;
begin
open c1;
fetch c1 bulk collect into v_text_array;
for i in 1..v_text_array.count loop
for j in 2..regexp_count(v_text_array(i),'\|\|') loop
val := trim(',' from regexp_replace(regexp_substr(v_text_array(i),'[^\|]+',1,j),'!!',','));
for k in 1..regexp_count(val,',')+1 loop
--display to console or further process...
dbms_output.put_line(regexp_substr(val,'[^,]+',1,k));
end loop;
end loop;
end loop;
end;
/
答案 1 :(得分:0)
以下内容返回预期结果:
with x as
(select '2322ABCD124A||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!||' str
from dual),
y as (
select regexp_substr(str,'[^||]+[!!]*', 1, level) str from x
where level > 1
connect by regexp_substr(str, '[^||]+[!!]*', 1, level) is not null
)
select
regexp_replace (
regexp_replace (
regexp_replace(str, '^!!', '(') ,
'!!$', ')'),
'[ ]*!![ ]*', ',') str
from y
答案 2 :(得分:0)
您需要按照here所述的分隔符应用两次拆分。 最后使用LISTAGG再次获取值(word),并使用一些字符串连接进行最终确定。
我提供了一个包含两个输入记录的完整示例,因此可以扩展任意数量的已解析行。
您可能需要调整限制分割数量的T2
表格。如果您的关键字中包含NULL值,则还需要一些特殊处理。
查询 - 在下面评论
WITH t1 AS
(SELECT 1 id,
'12322ABCD124A||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!|| ' col
FROM dual
UNION ALL
SELECT 2 id,
'22222ACCCC12Y||!!567!!word21 !!word22!! word23!!||!!789!!word24!!word25 !! word26!!||!!2345 !!word27!!word28!! 890!!|| ' col
FROM dual
),
t2 AS
(SELECT rownum colnum
FROM dual
CONNECT BY level < 10
/* (max) number of columns */
),
t3 AS
(SELECT t1.id,
t2.colnum,
regexp_substr(t1.col,'[^|]+', 1, t2.colnum) col
FROM t1,
t2
WHERE regexp_substr(t1.col, '[^|]+', 1, t2.colnum) IS NOT NULL
),
first_split AS
( SELECT id, colnum, col FROM t3 WHERE col LIKE '%!!%'
),
second_split AS
(SELECT t1.id,
t1.colnum linenum,
t2.colnum,
regexp_substr(t1.col,'[^!]+', 1, t2.colnum) col
FROM first_split t1,
t2
WHERE regexp_substr(t1.col, '[^!]+', 1, t2.colnum) IS NOT NULL
),
agg_values AS
(SELECT id,
linenum,
LISTAGG(col, ',') WITHIN GROUP (
ORDER BY colnum) val_lst
FROM second_split
GROUP BY id,
linenum
)
SELECT id,
'array['
|| row_number() over (partition BY ID order by linenum)
|| ']= ('
||val_lst
||')' array_text
FROM agg_values
ORDER BY 1,2
按要求收益
ID ARRAY_TEXT
1 array[1]= (123, word1, word2, word3)
1 array[2]= (789, word4, word5, word6)
1 array[3]= (2345, word7, word8, 890)
2 array[1]= (567, word21, word22, word23)
2 array[2]= (789, word24, word25, word26)
2 array[3]= (2345, word27, word28, 890)
这是 first_split 查询的结果。你打破了数据。
ID COLNUM COL
---------- ---------- ------------------------------------------
1 2 !!123!!word1 !!word2!! word3!!
1 3 !!789!!word4!!word5 !! word6!!
1 4 !!2345 !!word7!!word8!! 890!!
2 2 !!567!!word21 !!word22!! word23!!
2 3 !!789!!word24!!word25 !! word26!!
2 4 !!2345 !!word27!!word28!! 890!!
second_split 查询会破坏单词中的行。
ID LINENUM COLNUM COL
---------- ---------- ---------- --------------------------------------------------------------------------------------------------------------------------
1 2 1 123
1 2 2 word1
1 2 3 word2
1 2 4 word3
1 3 1 789
1 3 2 word4
1 3 3 word5
.....
其余的是LISTAGG获取csv关键字列表和ROW_NUMBER函数以获得不错的顺序array_ids
如果要在单独的列中提取值,请使用 PIVOT 而不是LISTAGG。缺点是您必须调整查询以获得实际的值数。
WITH t1 AS
(SELECT 1 id,
'12322ABCD124A||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!|| ' col
FROM dual
UNION ALL
SELECT 2 id,
'22222ACCCC12Y||!!567!!word21 !!word22!! word23!!||!!789!!word24!!word25 !! word26!!||!!2345 !!word27!!word28!! 890!!|| ' col
FROM dual
),
t2 AS
(SELECT rownum colnum
FROM dual
CONNECT BY level < 10
/* (max) number of columns */
),
t3 AS
(SELECT t1.id,
t2.colnum,
regexp_substr(t1.col,'[^|]+', 1, t2.colnum) col
FROM t1,
t2
WHERE regexp_substr(t1.col, '[^|]+', 1, t2.colnum) IS NOT NULL
),
first_split AS
( SELECT id, colnum, col FROM t3 WHERE col LIKE '%!!%'
),
--select * from first_split order by 1,2,3;
second_split AS
(SELECT t1.id,
t1.colnum linenum,
t2.colnum,
regexp_substr(t1.col,'[^!]+', 1, t2.colnum) col
FROM first_split t1,
t2
WHERE regexp_substr(t1.col, '[^!]+', 1, t2.colnum) IS NOT NULL
),
pivot_values AS
(SELECT *
FROM second_split PIVOT (MAX(col) col FOR (colnum) IN (1 AS "K1", 2 AS "K2", 3 AS "K3", 4 AS "K4"))
)
SELECT id,
row_number() over (partition BY ID order by linenum) AS array_id,
K1_COL,
K2_COL,
K3_COL,
K4_COL
FROM pivot_values
ORDER BY 1,2;
提供关系视图
ID ARRAY_ID K1_COL K2_COL K3_COL K4_COL
---------- ---------- -------- -------- -------- --------
1 1 123 word1 word2 word3
1 2 789 word4 word5 word6
1 3 2345 word7 word8 890
2 1 567 word21 word22 word23
2 2 789 word24 word25 word26
2 3 2345 word27 word28 890
答案 3 :(得分:0)
Oracle安装程序:
CREATE TABLE table_name ( id, value ) AS
SELECT 1, '12322ABCD124A||!!123!!word1 !!word2!! word3!!||!!789!!word4!!word5 !! word6!!||!!2345 !!word7!!word8!! 890!!||' FROM DUAL UNION ALL
SELECT 2, '12322ABCD124A||!!321!!word1a !!word2a!! word3a!!||!!987!!word4a!!word5a !! word6a!!||!!5432 !!word7a!!word8a!! 098!!||' FROM DUAL;
查询1 :
SELECT id,
grp_no,
CAST(
MULTISET(
SELECT REGEXP_SUBSTR( t.text, '!\s*([^!]+?)\s*!', 1, LEVEL, NULL, 1 )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.text, '!\s*([^!]+?)\s*!' )
)
AS SYS.ODCIVARCHAR2LIST
) AS words
FROM (
SELECT id,
COLUMN_VALUE AS grp_no,
REGEXP_SUBSTR( value, '\|([^|]+)\|', 1, COLUMN_VALUE, NULL, 1 ) AS text
FROM table_name t,
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '\|([^|]+)\|' )
)
AS SYS.ODCINUMBERLIST
)
)
) t;
<强>结果:
ID GRP_NO WORDS
---------- ---------- --------------------------------------------------------
1 1 SYS.ODCIVARCHAR2LIST('123','word1','word2','word3')
1 2 SYS.ODCIVARCHAR2LIST('789','word4','word5','word6')
1 3 SYS.ODCIVARCHAR2LIST('2345','word7','word8','890')
2 1 SYS.ODCIVARCHAR2LIST('321','word1a','word2a','word3a')
2 2 SYS.ODCIVARCHAR2LIST('987','word4a','word5a','word6a')
2 3 SYS.ODCIVARCHAR2LIST('5432','word7a','word8a','098')
查询2 :
SELECT id,
grp_no,
REGEXP_SUBSTR( t.text, '!\s*([^!]+)!', 1, 1, NULL, 1 ) AS Word1,
REGEXP_SUBSTR( t.text, '!\s*([^!]+)!', 1, 2, NULL, 1 ) AS Word2,
REGEXP_SUBSTR( t.text, '!\s*([^!]+)!', 1, 3, NULL, 1 ) AS Word3,
REGEXP_SUBSTR( t.text, '!\s*([^!]+)!', 1, 4, NULL, 1 ) AS Word4
FROM (
SELECT id,
COLUMN_VALUE AS grp_no,
REGEXP_SUBSTR( value, '\|([^|]+)\|', 1, COLUMN_VALUE, NULL, 1 ) AS text
FROM table_name t,
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '\|([^|]+)\|' )
)
AS SYS.ODCINUMBERLIST
)
)
) t;
<强>结果:
ID GRP_NO WORD1 WORD2 WORD3 WORD4
---- ------ ------- ------- ------- -------
1 1 123 word1 word2 word3
1 2 789 word4 word5 word6
1 3 2345 word7 word8 890
2 1 321 word1a word2a word3a
2 2 987 word4a word5a word6a
2 3 5432 word7a word8a 098