我有一个SQL表:
CREATE TABLE "text_files"
( "FILE_NAME" VARCHAR2(4000 BYTE),
"FILE_CONTENT" CLOB
);
我正在尝试运行以下查询:
WITH rsqfc ( file_content, line, rn, max_rn, nm1, prv, depth ) AS (
SELECT file_content,
REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, 1 ),
1,
REGEXP_COUNT( file_content, '.+?(' || CHR(10) || '|$)' ) - 1,
CASE SUBSTR( file_content, 1, 4 ) WHEN 'NM1*' THEN 1 ELSE 0 END,
CASE SUBSTR( file_content, 1, 4 ) WHEN 'PRV*' THEN 1 ELSE 0 END,
CASE SUBSTR( file_content, 1, 4 ) WHEN 'NM1*' THEN 1 WHEN 'PRV*' THEN 2 ELSE 0 END
FROM text_files
UNION ALL
SELECT file_content,
REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, rn + 1 ),
rn + 1,
max_rn,
CASE SUBSTR( REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, rn + 1 ), 1, 4 ) WHEN 'NM1*' THEN nm1 + 1 ELSE nm1 END,
CASE SUBSTR( REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, rn + 1 ), 1, 4 ) WHEN 'PRV*' THEN prv + 1 ELSE prv END,
CASE SUBSTR( REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, rn + 1 ), 1, 4 ) WHEN 'NM1*' THEN 1 WHEN 'PRV*' THEN 2 ELSE depth END
FROM rsqfc
WHERE rn < max_rn
)
SELECT CASE depth
WHEN 2 THEN '2010A' || CHR( 64 + prv )
WHEN 1 THEN '1000' || CHR( 64 + nm1 )
WHEN 0 THEN '000'
END AS "LOOP",
line
FROM rsqfc;
但是它给出了
ORA-00932:数据类型不一致:预期的CLOB出现CHAR错误
我知道我们不能对正则表达式和where子句使用CLOB。但是CLOB
中的内容超过4000个字符,当我尝试用VARCHAR2(4000)
字段更改表并将CLOB
复制到新字段时,不会复制。
它给了我
ORA-22835:缓冲区太小,无法从CLOB转换为CHAR或从BLOB转换为RAW(实际:4436,最大:4000)
我无法将文本分成多个字段。我需要1个字段中的全文。
任何帮助将不胜感激。
答案 0 :(得分:1)
使用流水线函数:
Oracle设置-测试数据:
CREATE TABLE text_files (
FILE_NAME VARCHAR2(4000 BYTE),
FILE_CONTENT CLOB
);
INSERT INTO text_files VALUES (
'test1',
'ISA*00*AUTHORIZAT*00*SECURITY I*ZZ*000000060000000*ZZ*000000010000000*110705*1132*^*00501*110705001*0*T*:~
GS*HC*00000006*00000001*20110705*113253*110705001*X*005010X222A1~
ST*837*0021*005010X222~
BHT*0019*00*244579*20061015*1023*CH~
NM1*41*2*PREMIER BILLING SERVICE*****46*TGJ23~
PER*IC*JERRY*TE*3055552222*EX*231~
NM1*40*2*KEY INSURANCE COMPANY*****46*66783JJT~
HL*1**20*1~
PRV*BI*PXC*203BF0100Y~
NM1*85*2*BEN KILDARE SERVICE*****XX*9876543210~
N3*234 SEAWAY ST~
N4*MIAMI*FL*33111~'
);
Oracle设置-流水线功能:
CREATE TYPE line_detail AS OBJECT(
line_no NUMBER(38,0),
line CLOB,
loop VARCHAR2(6)
);
/
CREATE TYPE line_detail_array AS TABLE OF line_detail;
/
CREATE FUNCTION get_Line_Detail(
file_content IN CLOB
) RETURN line_detail_array PIPELINED
AS
p_line CLOB;
p_loop VARCHAR2(6) := '000';
p_substr CHAR(4);
p_line_no PLS_INTEGER := 0;
p_line_start PLS_INTEGER := 1;
p_line_end PLS_INTEGER;
p_line_len PLS_INTEGER;
p_length PLS_INTEGER;
p_nm1 PLS_INTEGER := 0;
p_prv PLS_INTEGER := 0;
BEGIN
IF file_content IS NULL THEN
RETURN;
END IF;
p_length := DBMS_LOB.GETLENGTH( file_content );
LOOP
p_line := EMPTY_CLOB();
p_line_no := p_line_no + 1;
p_substr := DBMS_LOB.SUBSTR( file_content, 4, p_line_start );
IF p_substr = 'PRV*' THEN
p_prv := p_prv + 1;
p_loop := '2010A' || CHR( 64 + p_prv );
ELSIF p_substr = 'NM1*' THEN
p_nm1 := p_nm1 + 1;
p_loop := '1000' || CHR( 64 + p_nm1 );
END IF;
p_line_end := DBMS_LOB.INSTR( file_content, CHR(10), p_line_start );
IF p_line_end = 0 THEN
p_line_end := p_length;
END IF;
LOOP
EXIT WHEN p_line_start > p_line_end;
p_line_len := LEAST( 4000, p_line_end - p_line_start + 1 );
p_line := p_line || DBMS_LOB.SUBSTR( file_content, p_line_len, p_line_start );
p_line_start := p_line_start + p_line_len;
END LOOP;
PIPE ROW( line_detail( p_line_no, p_line, p_loop ) );
EXIT WHEN p_line_end >= p_length;
END LOOP;
END;
/
查询:
SELECT c.*
FROM text_files t
CROSS JOIN TABLE( get_Line_Detail( t.file_content ) ) c;
输出:
LINE_NO | LINE | LOOP ------: | :------------------------------------------------------------------------------------------------------------- | :----- 1 | ISA*00*AUTHORIZAT*00*SECURITY I*ZZ*000000060000000*ZZ*000000010000000*110705*1132*^*00501*110705001*0*T*:~<br> | 000 2 | GS*HC*00000006*00000001*20110705*113253*110705001*X*005010X222A1~<br> | 000 3 | ST*837*0021*005010X222~<br> | 000 4 | BHT*0019*00*244579*20061015*1023*CH~<br> | 000 5 | NM1*41*2*PREMIER BILLING SERVICE*****46*TGJ23~<br> | 1000A 6 | PER*IC*JERRY*TE*3055552222*EX*231~<br> | 1000A 7 | NM1*40*2*KEY INSURANCE COMPANY*****46*66783JJT~<br> | 1000B 8 | HL*1**20*1~<br> | 1000B 9 | PRV*BI*PXC*203BF0100Y~<br> | 2010AA 10 | NM1*85*2*BEN KILDARE SERVICE*****XX*9876543210~<br> | 1000C 11 | N3*234 SEAWAY ST~<br> | 1000C 12 | N4*MIAMI*FL*33111~ | 1000C
db <>提琴here
答案 1 :(得分:0)
假设文件的任何一行都不会超过4000字节长,那么:
Oracle设置:
CREATE TABLE text_files (
FILE_NAME VARCHAR2(4000 BYTE),
FILE_CONTENT CLOB
);
INSERT INTO text_files VALUES (
'test1',
'ISA*00*AUTHORIZAT*00*SECURITY I*ZZ*000000060000000*ZZ*000000010000000*110705*1132*^*00501*110705001*0*T*:~
GS*HC*00000006*00000001*20110705*113253*110705001*X*005010X222A1~
ST*837*0021*005010X222~
BHT*0019*00*244579*20061015*1023*CH~
NM1*41*2*PREMIER BILLING SERVICE*****46*TGJ23~
PER*IC*JERRY*TE*3055552222*EX*231~
NM1*40*2*KEY INSURANCE COMPANY*****46*66783JJT~
HL*1**20*1~
PRV*BI*PXC*203BF0100Y~
NM1*85*2*BEN KILDARE SERVICE*****XX*9876543210~
N3*234 SEAWAY ST~
N4*MIAMI*FL*33111~'
);
查询:
WITH rsqfc ( file_name, file_content, line_no, line, line_end, nm1, prv, depth ) AS (
SELECT file_name,
file_content,
1,
CASE
WHEN DBMS_LOB.INSTR( file_content, CHR(10) ) = 0
THEN DBMS_LOB.SUBSTR( file_content, DBMS_LOB.GETLENGTH( file_content ), 1 )
ELSE DBMS_LOB.SUBSTR( file_content, DBMS_LOB.INSTR( file_content, CHR(10) ), 1 )
END,
DBMS_LOB.INSTR( file_content, CHR(10) ),
CASE DBMS_LOB.SUBSTR( file_content, 4, 1 ) WHEN 'NM1*' THEN 1 ELSE 0 END,
CASE DBMS_LOB.SUBSTR( file_content, 4, 1 ) WHEN 'PRV*' THEN 1 ELSE 0 END,
CASE DBMS_LOB.SUBSTR( file_content, 4, 1 ) WHEN 'NM1*' THEN 1 WHEN 'PRV*' THEN 2 ELSE 0 END
FROM text_files
UNION ALL
SELECT file_name,
file_content,
line_no + 1,
CASE
WHEN DBMS_LOB.INSTR( file_content, CHR(10), line_end + 1 ) = 0
THEN DBMS_LOB.SUBSTR(
file_content,
DBMS_LOB.GETLENGTH( file_content ) - line_end,
line_end + 1
)
ELSE DBMS_LOB.SUBSTR(
file_content,
DBMS_LOB.INSTR( file_content, CHR(10), line_end + 1 ) - line_end,
line_end + 1
)
END,
DBMS_LOB.INSTR( file_content, CHR(10), line_end + 1 ),
CASE DBMS_LOB.SUBSTR( file_content, 4, line_end + 1 ) WHEN 'NM1*' THEN nm1 + 1 ELSE nm1 END,
CASE DBMS_LOB.SUBSTR( file_content, 4, line_end + 1 ) WHEN 'PRV*' THEN prv + 1 ELSE prv END,
CASE DBMS_LOB.SUBSTR( file_content, 4, line_end + 1 ) WHEN 'NM1*' THEN 1 WHEN 'PRV*' THEN 2 ELSE depth END
FROM rsqfc
WHERE line_end > 0
)
SELECT line_no,
CASE depth
WHEN 2 THEN '2010A' || CHR( 64 + prv )
WHEN 1 THEN '1000' || CHR( 64 + nm1 )
WHEN 0 THEN '000'
END AS "LOOP",
line
FROM rsqfc;
输出:
LINE_NO | LOOP | LINE ------: | :----- | :------------------------------------------------------------------------------------------------------------- 1 | 000 | ISA*00*AUTHORIZAT*00*SECURITY I*ZZ*000000060000000*ZZ*000000010000000*110705*1132*^*00501*110705001*0*T*:~<br> 2 | 000 | GS*HC*00000006*00000001*20110705*113253*110705001*X*005010X222A1~<br> 3 | 000 | ST*837*0021*005010X222~<br> 4 | 000 | BHT*0019*00*244579*20061015*1023*CH~<br> 5 | 1000A | NM1*41*2*PREMIER BILLING SERVICE*****46*TGJ23~<br> 6 | 1000A | PER*IC*JERRY*TE*3055552222*EX*231~<br> 7 | 1000B | NM1*40*2*KEY INSURANCE COMPANY*****46*66783JJT~<br> 8 | 1000B | HL*1**20*1~<br> 9 | 2010AA | PRV*BI*PXC*203BF0100Y~<br> 10 | 1000C | NM1*85*2*BEN KILDARE SERVICE*****XX*9876543210~<br> 11 | 1000C | N3*234 SEAWAY ST~<br> 12 | 1000C | N4*MIAMI*FL*33111~
db <>提琴here
答案 2 :(得分:-2)
然后是一个存根。
create or replace function get_depth(sFileName in VARCHAR2)
return varchar2 as
sBuffer varchar2(32000);
iResult integer := 0;
begin
--
for rec in (select file_content from "text_files" where file_name = sFileName) loop
sBuffer := dbms_lob.substr(rec.file_content,32000);
exit;
end loop;
if sBuffer is not null then
-- Your regexp on sBuffert
end if;
return iResult;
end get_depth;
然后选择:
select get_depth('myfile') depth from dual;