我有这样的数据集(见下文),我尝试提取{variable_number_of_digits} {hyphen} {only_one_digit}形式的数字:
with mcte as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' as addr from dual
union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' as addr from dual
union all
select 'IIODK/1573230-0/2216755-7/' as addr from dual
union all
select 'IIODK/1573230-0/2216755-700/WRITE' as addr from dual
)
select addr,
REGEXP_SUBSTR(addr,'(\/)([0-9-]+)',1,1,NULL,2) AS num1,
REGEXP_SUBSTR(addr,'(\/)([^\/]+\/)([0-9\-]+)',1,1,NULL,3) num2
from mcte
;
我没有得到正确的结果集,应该是以下
+-------------------------------------+-----------+-----------+
| ADDR | NUM1 | NUM2 |
+-------------------------------------+-----------+-----------+
| ILLD/ELKJS/00000000/ELKJS/FHSH | NULL | NULL |
| ILLD/EFECTE/0116988-7-002/ADFA/ADFG | NULL | NULL |
| IIODK/1573230-0/2216755-7/ | 1573230-0 | 2216755-7 |
| IIODK/1573230-0/2216755-700/WRITE | 1573230-0 | NULL |
+-------------------------------------+-----------+-----------+
如何实现这一目标?
答案 0 :(得分:2)
如果您想从第二个和第三个 sudo apt-get install gdebi-core
sudo gdebi python-tesseract_0.9-0.5ubuntu2_i386.deb
分隔的组中获取结果,那么:
/
<强>输出强>:
with mcte ( addr ) as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' from dual union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' from dual union all
select 'IIODK/1573230-0/2216755-7/' from dual union all
select 'IIODK/1573230-0/2216755-700/WRITE' from dual union all
select 'IIODK/TEST/1573230-0/2216755-700/WRITE' from dual
)
select addr,
REGEXP_SUBSTR(addr,'^[^/]*/(\d+-\d)/',1,1,NULL,1) AS num1,
REGEXP_SUBSTR(addr,'^[^/]*/[^/]*/(\d+-\d)/',1,1,NULL,1) num2
from mcte;
<强>更新强>:
如果你只想要匹配的第一个和第二个模式而不关心它们在字符串中的位置,那么:
ADDR NUM1 NUM2
-------------------------------------- ------------------- -------------------
ILLD/ELKJS/00000000/ELKJS/FHSH
ILLD/EFECTE/0116988-7-002/ADFA/ADFG
IIODK/1573230-0/2216755-7/ 1573230-0 2216755-7
IIODK/1573230-0/2216755-700/WRITE 1573230-0
IIODK/TEST/1573230-0/2216755-700/WRITE 1573230-0
<强>输出强>:
with mcte ( addr ) as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' from dual union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' from dual union all
select 'IIODK/1573230-0/2216755-7/' from dual union all
select 'IIODK/1573230-0/2216755-700/WRITE' from dual union all
select 'IIODK/TEST/1573230-0/2216755-700/WRITE' from dual union all
select '1234567-8' from dual union all
select '1234567-8/9876543-2' from dual union all
select '1234567-8/TEST/9876543-2' from dual
)
select addr,
REGEXP_SUBSTR(addr,'(^|/)(\d+-\d)(/|$)',1,1,NULL,2) AS num1,
REGEXP_SUBSTR(addr,'(^|/)\d+-\d(/.+?)?/(\d+-\d)(/|$)',1,1,NULL,3) num2
from mcte;
答案 1 :(得分:0)
我尝试提取{variable_number_of_digits} {hyphen} {only_one_digit}
形式的数字
要匹配此格式的数字,您应该执行以下操作。
正则表达式: \/\d+-\d
<强> Regex101 Demo 强>
答案 2 :(得分:0)
将delimiter split query与REGEXP_LIKE
和 pivot 结合使用 - 您可以获得此查询最多6个数字的结果。您需要更新cols
子查询和teh pivot
列表才能处理每条记录的更多数字。 (不幸的是,在静态SQL中无法做到这一点。)
with mcte as (
select 1 id, 'ILLD/ELKJS/00000000/ELKJS/FHSH' as addr from dual
union all
select 2 id, 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' as addr from dual
union all
select 3 id, 'IIODK/1573230-0/2216755-7/' as addr from dual
union all
select 4 id, '1-1/1573230-0/2216755-700/676-7' as addr from dual
),
cols as (select rownum colnum from dual connect by level < 6 /* (max) number of columns */),
mcte2 as (select id, cols.colnum, (regexp_substr(addr,'[^/]+', 1, cols.colnum)) addr
from mcte, cols where regexp_substr(addr, '[^/]+', 1, cols.colnum) is not null),
mcte3 as (
select ID,
ROW_NUMBER() over (partition by ID order by COLNUM) as col_no, ADDR from mcte2
where REGEXP_like(addr, '^[0-9]+-[0-9]$')
)
select * from mcte3
PIVOT (max(addr) for (col_no) in
(1 as "NUM1",
2 as "NUM2",
3 as "NUM3",
4 as "NUM4",
5 as "NUM5",
6 as "NUM6"))
order by id;
这给出了结果
ID NUM1 NUM2 NUM3 NUM4 NUM5 NUM6
---------- ---------- ---------- ---------- ---------- ---------- ----------
3 1573230-0 2216755-7
4 1-1 1573230-0 676-7