从csv文件中读取数据后,我收到以下格式的字符串
, Inc
我只想将其转换为数组,同时包含6个值,stat
之前的逗号需要转义。
任何人都可以建议在PL / SQL中最好的方法吗?
答案 0 :(得分:3)
这与this question类似,但您的列表中有空元素;我在那里试过的其中一种模式的简单翻译会跳过这些:
var v_lastline varchar2(50);
exec :v_lastline := '29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX';
select level as lvl,
regexp_substr(:v_lastline, '("[^"]*"|[^,]+)', 1, level) as element
from dual
connect by level <= regexp_count(:v_lastline, '("[^"]*"|[^,]+)');
LVL ELEMENT
---------- ----------------------------------------
1 29218368
2 8062115
3 " Benedict Canyon Equities, Inc"
4 CLS
5 FAX
如果你能识别一个永远不会出现在数据中的特殊字符,那么你可以通过将每个逗号更改为逗号+字符,然后在拆分后删除它来将其放入空元素中来解决这个问题:
select level as lvl,
replace(regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level), '§', null) as element
from dual
connect by regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level) is not null;
LVL ELEMENT
---------- ----------------------------------------
1 29218368
2 8062115
3 " Benedict Canyon Equities, Inc"
4 CLS
5
6 FAX
它是分割分隔字符串which is explained in detail here的常用方法的扩展。
replace(:v_lastline, ',', ',§')
将...,CLS,,FAX
更改为...,§CLS,§,§FAX
,其中§
是您永远不会看到的角色。regexp_substr(..., '(§"[^"]*"|[^,]+)', 1, level)
使用正则表达式标记更新的值,该正则表达式查找任何双引号括起的值(现在也以特殊字符开头)或非逗号;评估的顺序意味着引用部分内的逗号被忽略。level
是分层查询语法的一部分,其中:connect by regexp_substr(<same value and pattern>) is not null
只知道有多少令牌。replace(regexp_substr(...), , '§', null)
删除了第一步中使用的特殊字符。然后,您可以使用更高级别replace()
删除双引号,并根据需要修剪空格。
你没有说出一个数组的意思,但你可以在PL / SQL中运行该查询并批量收集到一个集合中(如果你打算使用它)。例如,使用内置的ODCIVARCHAR2LIST集合类型:
set serveroutput on
declare
v_lastline varchar2(50);
v_array sys.odcivarchar2list;
begin
v_lastline := '29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX';
select trim(replace(replace(
regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level), '§', null), '"', null))
bulk collect into v_array
from dual
connect by regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level) is not null;
dbms_output.put_line('Number of elements: ' || v_array.count);
for i in 1..v_array.count loop
dbms_output.put_line('Index ' || i || ' has: ' || v_array(i));
end loop;
end;
/
Number of elements: 6
Index 1 has: 29218368
Index 2 has: 8062115
Index 3 has: Benedict Canyon Equities, Inc
Index 4 has: CLS
Index 5 has:
Index 6 has: FAX
对于多个空元素,这也(现在)有效:
exec :v_lastline := '29218368,8062115," Benedict Canyon Equities, Inc",,,,,,,CLS,,,,,FAX,,,,,,,,,,,,,,,,,,INVOICE';
select level as lvl,
replace(regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level), '§', null) as element
from dual
connect by regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level) is not null;
LVL ELEMENT
---------- ----------------------------------------
1 29218368
2 8062115
3 " Benedict Canyon Equities, Inc"
4
...
9
10 CLS
11
...
14
15 FAX
16
...
32
33 INVOICE
答案 1 :(得分:0)
如果CSV的结构已修复,您可以尝试使用以下内容:
with text(text) as ( select '29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX' from dual)
select level,
trim(',' from
case
when level in (1,2) then
regexp_substr(text, '(.*??)\,', 1, level)
when level = 3 then
regexp_substr(text, '"(.*??)"', 1, 1)
when level in (4,5) then
regexp_substr(text, '(.*??)\,', instr(text, '"', 1, 2), level -2)
when level = 6 then
regexp_substr(text, '\,([^\,]*)', instr(text, '"', 1, 2), 3)
end
)
from text
connect by level <= 6
通过以不同方式处理每个部分,对CSV的结构做出了强有力的假设,但在我看来很难找到一个真正通用的解决方案。
答案 2 :(得分:0)
这是一个没有正则表达式的解决方案,首先创建两个辅助函数
/* CAR select car('hello,world,bla') from dual --> hello */
create or replace function car(PI_STR in varchar2,
PI_SEPARATOR in varchar2 default ',')
return varchar2 is l_pos number;
begin
l_pos := instr(PI_STR, PI_SEPARATOR);
if l_pos > 0 then
return substr(PI_STR, 1, l_pos - 1);
end if;
return PI_STR;
end;
/* CDR select cdr('hello,world,bla') from dual --> world,bla */
create or replace function cdr(PI_STR in varchar2,
PI_SEPARATOR in varchar2 default ',')
return varchar2 is l_pos number;
begin
l_pos := instr(PI_STR, PI_SEPARATOR);
if l_pos > 0 then
return substr(PI_STR, l_pos + length(PI_SEPARATOR));
end if;
return '';
end;
现在:通过','提取并为每个结果连接下一个条目,如果找到转义字符到下一个转义字符:
create or replace type csv_col is table of varchar2(4000);
create or replace function get_columns(PI_STR in varchar2,
PI_SEPARATOR in varchar2,
PI_ESC_CHAR in varchar2)
return csv_col pipelined is l_car varchar2(4000);
l_cdr varchar2(4000);
l_car_esc varchar2(4000);
begin
l_car := car(PI_STR, PI_SEPARATOR);
l_cdr := cdr(PI_STR, PI_SEPARATOR);
-- check for escape char
l_car_esc := cdr(l_car, PI_ESC_CHAR);
if l_car_esc is not null then
l_car := l_car_esc || PI_SEPARATOR || car(l_cdr, PI_ESC_CHAR);
l_cdr := cdr(cdr(l_cdr, PI_ESC_CHAR), PI_SEPARATOR);
end if;
loop
if l_car is null and l_cdr is null then
exit;
end if;
pipe row(l_car);
l_car := car(l_cdr, PI_SEPARATOR);
l_cdr := cdr(l_cdr, PI_SEPARATOR);
l_car_esc := cdr(l_car, PI_ESC_CHAR);
if l_car_esc is not null then
l_car := l_car_esc || PI_SEPARATOR || car(l_cdr, PI_ESC_CHAR);
l_cdr := cdr(cdr(l_cdr, PI_ESC_CHAR), PI_SEPARATOR);
dbms_output.put_line(l_car);
dbms_output.put_line(l_cdr);
end if;
end loop;
end;
这样称呼:
select *
from table(get_columns('29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX',
',',
'"'));
- &GT;结果
29218368
8062115
Benedict Canyon Equities, Inc
CLS
FAX