我正在尝试使用THIS AskTom帖子中的代码来分离DBF文件,但是我不知道我什至要弄清楚我要分离的文件是如何格式化的?最初的AskTom答案是如何产生的?我如何找出dbf标头。我怎么知道数据存储在dbf文件中的什么位置,甚至如何提取数据?
我的目标是使用提供的代码,并像其他人一样提出解决方案,但我只停留在第一部分。
在研究此问题时,我发现有许多系统使用DBF文件。 ACCESS和MS Excel能够导入DBF文件,但是当我尝试从Oracle Express安装中导入DBF文件时,这些功能都无法正常工作。在ACCESS中,可以从几种不同的dbf文件格式中进行选择,包括 dBASE III,dBASE IV,dBASE 5和dBASE 7 。我不知道Oracle是否甚至是这些格式中的任何一种,希望我知道如何找出这些信息。就我列出的格式而言,Fox Pro似乎是dbf文件的主要用户。
AskTom链接中的代码很长,并且不一定与此问题相关,只是为了让您知道我来自何处,无论如何我都将其包括在内。我可以运行代码,但是它不起作用。我想修复它,但是我需要了解Oracle DBF文件,或者也许我还缺少其他东西?
How to Read a DBase File in Java or Oracle PL/SQL
What is the best opensource dbf driver for java?
create or replace package dbase_fox as
-- procedure to a load a table with records
-- from a DBASE file.
--
-- Uses a BFILE to read binary data and dbms_sql
-- to dynamically insert into any table you
-- have insert on.
--
-- p_filename is the name of a file in that directory
-- will be the name of the DBASE file
-- p_colnames is an optional list of comma separated
-- column names. If not supplied, this pkg
-- assumes the column names in the DBASE file
-- are the same as the column names in the
-- table
-- p_rownum boolean that activates an autonum
-- functionality such that a sequential
-- numbered virtual column is created
/* EXAMPLE USAGE
| BEGIN
|
| dbase_fox.showtable(p_filename => 'CONTROL'
| , p_colnames => null
| , p_rownum => false);
|
| END;
*/
procedure loadtable(p_filename in varchar2
, p_colnames in varchar2 default null
, p_rownum in boolean default false);
-- procedure to print (and not insert) what we find in
-- the DBASE files (not the data, just the info
-- from the dbase headers....)
--
-- p_filename is the name of a file in that directory
-- will be the name of the DBASE file
-- p_colnames is an optional list of comma separated
-- column names. If not supplied, this pkg
-- assumes the column names in the DBASE file
-- are the same as the column names in the
-- table
-- p_rownum boolean that activates an autonum
-- functionality such that a sequential
-- numbered virtual column is created
procedure showtable(p_filename in varchar2
, p_colnames in varchar2 default null
, p_rownum in boolean default false);
end;
/
/* Package BODY */
create or replace package body dbase_fox as
PREFIX constant varchar2(32) default 'stage_';
CR constant varchar(2) default chr(13)||chr(10);
MEMODTYPE constant varchar2(32) default 'varchar2(4000)';
ROWNUMNAME constant varchar2(32) default '"ROWNUM"';
FRAMESIZE constant integer default 1000;
addrownum boolean := false;
colnames varchar2(255) := '';
filename varchar2(32) := '';
dbfbfile bfile := null;
fptbfile bfile := null;
DBF_HEADER_SIZE constant number default 32;
type dbf_header_type is record (
version varchar2(25) -- dBASE version number
,year int -- 1 byte int year, add to 1900
,month int -- 1 byte month
,day int -- 1 byte day
,no_records int -- number of records in file, 4 byte int
,hdr_len int -- length of header, 2 byte int
,rec_len int -- number of bytes in record, 2 byte int
,no_fields int -- number of fields
);
dbf_header dbf_header_type := null;
subtype dbf_header_data is raw(32);
DBF_FIELD_DESCRIPTOR_SIZE constant number default 32;
type dbf_field_descriptor_type is record (
name varchar2(11)
,type char(1)
,length int -- 1 byte length
,decimals int -- 1 byte scale
);
type dbf_field_descriptor_array is table of dbf_field_descriptor_type index by binary_integer;
subtype dbf_field_descriptor_data is raw(32);
dbf_field_descriptor dbf_field_descriptor_array;
type rowarray_type is table of dbms_sql.varchar2_table index by binary_integer;
rowarray rowarray_type;
subtype raw_type is raw(4000);
type rawarray_type is table of raw_type index by binary_integer;
rawarray rawarray_type;
loadcursor binary_integer;
mblocksize number := 0;
procedure get_header is
l_data dbf_header_data;
begin
l_data := dbms_lob.substr(dbfbfile, DBF_HEADER_SIZE, 1);
dbf_header.version := utl_raw.cast_to_binary_integer(utl_raw.substr(l_data, 1, 1));
dbf_header.year := 1900 + utl_raw.cast_to_binary_integer(utl_raw.substr( l_data, 2, 1));
dbf_header.month := utl_raw.cast_to_binary_integer(utl_raw.substr(l_data, 3, 1));
dbf_header.day := utl_raw.cast_to_binary_integer(utl_raw.substr(l_data, 4, 1));
dbf_header.no_records := utl_raw.cast_to_binary_integer(utl_raw.substr(l_data, 5, 4),2);
dbf_header.hdr_len := utl_raw.cast_to_binary_integer(utl_raw.substr(l_data, 9, 2),2);
dbf_header.rec_len := utl_raw.cast_to_binary_integer(utl_raw.substr(l_data, 11, 2),2);
dbf_header.no_fields := trunc((dbf_header.hdr_len - DBF_HEADER_SIZE) / DBF_FIELD_DESCRIPTOR_SIZE);
end;
procedure get_header_fields is
l_data dbf_field_descriptor_data;
begin
for i in 1 .. dbf_header.no_fields loop
l_data := dbms_lob.substr(dbfbfile, DBF_FIELD_DESCRIPTOR_SIZE, 1+DBF_HEADER_SIZE+(i-1)*DBF_FIELD_DESCRIPTOR_SIZE); -- starting past the header
dbf_field_descriptor(i).name := rtrim(utl_raw.cast_to_varchar2(utl_raw.substr(l_data,1,11)),chr(0));
dbf_field_descriptor(i).type := utl_raw.cast_to_varchar2(utl_raw.substr(l_data, 12, 1));
dbf_field_descriptor(i).length := utl_raw.cast_to_binary_integer(utl_raw.substr(l_data, 17, 1));
dbf_field_descriptor(i).decimals := utl_raw.cast_to_binary_integer(utl_raw.substr(l_data,18,1));
end loop;
end;
procedure show_field_header_columns is
begin
dbms_output.put_line(CR||'Num'
||chr(9)||'Name '
||chr(9)||'Type'
||chr(9)||'Length'
||chr(9)||'Decimals');
dbms_output.put_line('==='
||chr(9)||'==== '
||chr(9)||'===='
||chr(9)||'======'
||chr(9)||'========');
end;
procedure show_header(p_file_length in integer) is
begin
dbms_output.put_line(chr(9)||dbf_header.version
||chr(9)||dbf_header.year
||chr(9)||dbf_header.month
||chr(9)||dbf_header.day
||chr(9)||dbf_header.no_records
||chr(9)||dbf_header.hdr_len
||chr(9)||dbf_header.rec_len
||chr(9)||dbf_header.no_fields
||chr(9)||p_file_length);
end;
procedure show_fields is
begin
for i in dbf_field_descriptor.first .. dbf_field_descriptor.last loop
dbms_output.put_line(i
||chr(9)||rpad(dbf_field_descriptor(i).name,11,' ')
||chr(9)||dbf_field_descriptor(i).type
||chr(9)||dbf_field_descriptor(i).length
||chr(9)||dbf_field_descriptor(i).decimals);
end loop;
end;
function build_insert return varchar2 is
l_statement long;
begin
l_statement := 'insert into ' || PREFIX || filename || ' (';
if colnames is not null then
l_statement := l_statement || colnames;
else
for i in dbf_field_descriptor.first .. dbf_field_descriptor.last loop
if i <> 1 then
l_statement := l_statement || ',';
end if;
l_statement := l_statement || '"'|| dbf_field_descriptor(i).name || '"';
end loop;
if addrownum then
l_statement := l_statement || ',' || ROWNUMNAME;
end if;
end if;
l_statement := l_statement || ') values (';
for i in dbf_field_descriptor.first .. dbf_field_descriptor.last loop
if i <> 1 then
l_statement := l_statement || ',';
end if;
if dbf_field_descriptor(i).type = 'D' then
l_statement := l_statement || 'to_date(:bv' || i || ',''yyyymmdd'' )';
else
l_statement := l_statement || ':bv' || i;
end if;
end loop;
if addrownum then
l_statement := l_statement || ',:bv' || (dbf_field_descriptor.last + 1);
end if;
l_statement := l_statement || ')';
return l_statement;
end;
function build_create return varchar2 is
l_statement long;
begin
l_statement := 'create table ' || PREFIX || filename || ' (';
for i in dbf_field_descriptor.first .. dbf_field_descriptor.last loop
l_statement := l_statement || CR;
if i <> dbf_field_descriptor.first then
l_statement := l_statement ||',';
else
l_statement := l_statement ||' ';
end if;
l_statement := l_statement || '"'|| dbf_field_descriptor(i).name || '"'||chr(9);
if dbf_field_descriptor(i).type = 'D' then
l_statement := l_statement || 'date';
elsif dbf_field_descriptor(i).type = 'F' then
l_statement := l_statement || 'float';
elsif dbf_field_descriptor(i).type = 'N' then
if dbf_field_descriptor(i).decimals > 0 then
l_statement := l_statement || 'number('||dbf_field_descriptor(i).length||','|| dbf_field_descriptor(i).decimals || ')';
else
l_statement := l_statement || 'number('||dbf_field_descriptor(i).length||')';
end if;
elsif dbf_field_descriptor(i).type = 'M' then
l_statement := l_statement || MEMODTYPE;
else
l_statement := l_statement || 'varchar2(' || dbf_field_descriptor(i).length || ')';
end if;
end loop;
if addrownum then
l_statement := l_statement || CR || ',' || ROWNUMNAME || chr(9) || 'number';
end if;
l_statement := l_statement ||CR||');'||CR||'/';
return l_statement;
end;
procedure show_header_columns is
begin
dbms_output.put_line(CR||'DBASE File'
||chr(9)||'Version'
||chr(9)||'Year'
||chr(9)||'Month'
||chr(9)||'Day'
||chr(9)||'#Recs'
||chr(9)||'HdrLen'
||chr(9)||'RecLen'
||chr(9)||'#Fields'
||chr(9)||'Size');
dbms_output.put_line('=========='
||chr(9)||'======='
||chr(9)||'===='
||chr(9)||'====='
||chr(9)||'==='
||chr(9)||'====='
||chr(9)||'======'
||chr(9)||'======'
||chr(9)||'======='
||chr(9)||'====');
end;
procedure loadtablerecord(i in number) is
l_n number;
l_fblock number;
l_data raw_type;
begin
l_data := dbms_lob.substr(dbfbfile,dbf_header.rec_len,2+DBF_HEADER_SIZE+dbf_header.no_fields*DBF_FIELD_DESCRIPTOR_SIZE+(i-1)*dbf_header.rec_len); -- starting past the header and field descriptors
rawarray(0) := utl_raw.substr(l_data, 1, 1);
l_n := 2;
for j in 1 .. dbf_header.no_fields loop
rawarray(j) := utl_raw.substr(l_data,l_n,dbf_field_descriptor(j).length);
if dbf_field_descriptor(j).type = 'F' and rawarray(j) = '.' then
rawarray(j) := null;
elsif dbf_field_descriptor(j).type = 'M' then
if dbms_lob.isopen(fptbfile) != 0 then
l_fblock := nvl(utl_raw.cast_to_binary_integer(dbms_lob.substr(fptbfile, 4, to_number(trim(utl_raw.cast_to_varchar2(rawarray(j))))*mblocksize+5)),0);
rawarray(j) := dbms_lob.substr(fptbfile, l_fblock, to_number(trim(utl_raw.cast_to_varchar2(rawarray(j))))*mblocksize+9);
else
dbms_output.put_line(filename || '.fpt not found');
end if;
end if;
l_n := l_n + dbf_field_descriptor(j).length;
end loop;
end;
procedure loadtablearray(p_cntarr in int) is
l_bulkcnt number;
begin
for j in 1 .. dbf_header.no_fields loop
dbms_sql.bind_array(loadcursor, ':bv'||j, rowarray(j),1,p_cntarr);
end loop;
if addrownum then
dbms_sql.bind_array(loadcursor, ':bv'||(dbf_header.no_fields+1), rowarray(dbf_header.no_fields+1),1,p_cntarr);
end if;
begin
l_bulkcnt := dbms_sql.execute(loadcursor);
--dbms_output.put_line('Bulk insert count ' || l_bulkcnt);
exception
when others then
dbms_output.put_line('Bulk insert failed ' || sqlerrm);
dbms_output.put_line(build_insert);
end;
end;
procedure loadtablebulk is
l_cntrow int default 0;
l_cntarr int default 0;
begin
loadcursor := dbms_sql.open_cursor;
dbms_sql.parse(loadcursor, build_insert, dbms_sql.native);
for i in 1 .. dbf_header.no_records loop
loadtablerecord(i);
if utl_raw.cast_to_varchar2(rawarray(0)) <> '*' then
l_cntarr := l_cntarr + 1;
for j in 1 .. dbf_header.no_fields loop
rowarray(j)(l_cntarr) := trim(utl_raw.cast_to_varchar2(rawarray(j)));
end loop;
if addrownum then
l_cntrow := l_cntrow + 1;
rowarray((dbf_header.no_fields+1))(l_cntarr) := l_cntrow;
end if;
if l_cntarr >= FRAMESIZE then
loadtablearray(l_cntarr);
l_cntarr := 0;
end if;
end if;
end loop;
if l_cntarr > 0 then
loadtablearray(l_cntarr);
end if;
dbms_sql.close_cursor(loadcursor);
exception
when others then
if dbms_sql.is_open(loadcursor) then
dbms_sql.close_cursor(loadcursor);
end if;
dbms_output.put_line('loadtable failed for ' || filename);
dbms_output.put_line('insert ' || build_insert);
end;
procedure open_dbf is
begin
dbfbfile := bfilename('FILE_GET_DIR', filename || '.dbf');
dbms_lob.fileopen(dbfbfile);
end;
procedure open_fpt is
begin
fptbfile := bfilename('FILE_GET_DIR', filename || '.fpt');
if dbms_lob.fileexists(fptbfile) != 0 then
dbms_lob.fileopen(fptbfile);
end if;
end;
procedure close_dbf is
begin
if dbms_lob.isopen(dbfbfile) > 0 then
dbms_lob.fileclose(dbfbfile);
end if;
end;
procedure close_fpt is
begin
if dbms_lob.isopen(fptbfile) > 0 then
dbms_lob.fileclose(fptbfile);
end if;
end;
procedure initialize is
l_empty_dbf_field_descriptor dbf_field_descriptor_array;
l_empty_rowarray rowarray_type;
l_empty_rawarray rawarray_type;
begin
dbfbfile := null;
fptbfile := null;
dbf_field_descriptor := l_empty_dbf_field_descriptor;
dbf_header := null;
rowarray := l_empty_rowarray;
rawarray := l_empty_rawarray;
loadcursor := 0;
mblocksize := 0;
end;
procedure showtable(p_filename in varchar2, p_colnames in varchar2 default null, p_rownum in boolean default false) is
errorAtLine NUMBER := 0;
begin
filename := p_filename;
addrownum := p_rownum;
colnames := p_colnames;
initialize;
errorAtLine := 1;
open_dbf;
errorAtLine := 2;
get_header;
errorAtLine := 3;
get_header_fields;
errorAtLine := 4;
show_header_columns;
errorAtLine := 5;
dbms_output.put(filename || '.dbf');
errorAtLine := 6;
show_header(dbms_lob.getlength(dbfbfile));
errorAtLine := 7;
show_field_header_columns;
errorAtLine := 8;
show_fields;
errorAtLine := 9;
dbms_output.put_line(CR||'Insert statement:');
dbms_output.put_line(build_insert);
dbms_output.put_line(CR||'Create statement:');
dbms_output.put_line(build_create);
close_dbf;
exception
when others then
close_dbf;
dbms_output.put_line('Error At: ' || errorAtLine);
raise;
end;
procedure loadtable(p_filename in varchar2, p_colnames in varchar2 default null, p_rownum in boolean default false) is
begin
filename := p_filename;
addrownum := p_rownum;
colnames := p_colnames;
initialize;
open_dbf;
open_fpt;
if dbms_lob.isopen(fptbfile) > 0 then
mblocksize := utl_raw.cast_to_binary_integer(dbms_lob.substr(fptbfile, 2, 7));
else
mblocksize := 0;
end if;
get_header;
get_header_fields;
loadtablebulk;
close_dbf;
close_fpt;
exception
when others then
close_dbf;
close_fpt;
raise;
end;
end;
答案 0 :(得分:2)
我遇到了类似的问题,这就是我的解决方法。
TL; DR:您需要使用Apache Tika来解析DBase文件。它将内容转换为XHTML表并将其作为java.lang.String
返回,您可以通过DOM或SAX解析器对其进行解析,以获取所需格式的数据。以下是一些示例:https://tika.apache.org/1.20/examples.html
首先,将以下Maven依赖项添加到您的POM:
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.21</version>
</dependency>
然后初始化解析器:
Parser parser = new DBFParser(); //Alternatively, you can use AutoDetectParser
ContentHandler handler = new BodyContentHandler(new ToXMLContentHandler()); //This is tells the parser to produce an XHTML as an output.
parser.parse(dbaseInputStream, handler, new Metadata(), new ParseContext()); // Here, dbaseInputStream is a FileInputStream object for the DBase file.
String dbaseAsXhtml = handler.toString(); //This will have the content in XHTML format
现在,要将数据转换为更方便的格式(在本例中为CSV),我做了以下操作:
首先,将整个String转换为DOM对象:
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document xhtmlDoc= builder.parse(new InputSource(new StringReader(xmlString.trim().replaceAll("\t", "")))); //I'm trimming out the tabs and whitespaces here, so that I don't have to dealt with them later
现在,获取标题:
XPath xPath = XPathFactory.newInstance().newXPath();
NodeList tableHeader = (NodeList)xPath.evaluate("//table/thead/th", xhtmlDoc, XPathConstants.NODESET);
String [] headers = new String[tableHeader.getLength()];
for(int i = 0; i < tableHeader.getLength(); i++) {
headers[i] = tableHeader.item(i).getTextContent();
}
然后记录:
XPath xPath = XPathFactory.newInstance().newXPath();
NodeList tableRecords = (NodeList)xPath.evaluate("//table/tbody/tr", xhtmlDoc, XPathConstants.NODESET);
List<String[]> records = new ArrayList<String[]>(tableRecords.getLength());
for(int i = 0; i < tableRecords.getLength(); i++) {
NodeList recordNodes = tableRecords.item(i).getChildNodes();
String[] record = new String[recordNodes.getLength()];
for(int j = 0; j < recordNodes.getLength(); j++)
record[j] = recordNodes.item(j).getTextContent();
records.add(record);
}
最后,我们将它们放在一起以形成CSV:
StringBuilder dbaseCsvStringBuilder = new StringBuilder(String.join(",", headers) + "\n");
for(String[] record : records)
dbaseCsvStringBuilder.append(String.join(",", record) + "\n");
String csvString = dbaseCsvStringBuilder.toString();