将pdf ocr数据从文件插入数据库

时间:2019-03-26 18:55:56

标签: java oracle ocr

我已经使用pdfbox开源库完成了pdf文件的ocr,目前将其数据写入 Text 文件中,但我想将所有数据放入数据库表中

public class Pdfocr {

    private static final String DB_CONNECTION = "jdbc:oracle:thin:@host:port:db";
    private static final String DB_USER = "user";
    private static final String DB_PASSWORD = "pwd";

    public static void main(String[] args) {
        PDDocument pd;
        BufferedWriter wr;

        try {
            Connection dbConnection = DriverManager.getConnection(DB_CONNECTION, DB_USER, DB_PASSWORD);
            System.out.println("Connection");
            Statement statement = dbConnection.createStatement();

            //statement.execute("INSERT INTO xx_lockbox_receipts(BATCH, TRX, CHECK_DATE, LOCKBOX_NO, ACCT_NO, CHECK_NO, AMOUNT) VALUES (1,1,SYSDATE,123,123,321,10)");

            File input = new File("C:/ocr/MTBC lockbox deposits Feb 01, 2019 ($5,185.85).pdf");
            File output = new File("C:/ocr/SampleText.txt");
            pd = PDDocument.load(input);
            int i = pd.getNumberOfPages();

            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setStartPage(1);
            stripper.setEndPage(i);
            wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)));
            stripper.writeText(pd, wr);
            if (pd != null) {
                pd.close();
            }
            wr.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

上面的代码的注释中提到了SQL INSERT语句;提取的文本文件的输出示例如下:

Batch: 1, Tran: 1, Date: 20190201, Amount: 73.88, Lockbox#: 83495, Acc#: 1200653200, Check#: 1242
Batch: 1, Tran: 1, Date: 20190201, Amount: 0.00, Lockbox#: 83495, Acc#: -
Batch: 1, Tran: 2, Date: 20190201, Amount: 3440.91, Lockbox#: 83495, Acc#: 5020607192, Check#: 23322
Batch: 1, Tran: 2, Date: 20190201, Amount: 0.00, Lockbox#: 83495, Acc#: -
Batch: 1, Tran: 2, Date: 20190201, Amount: 0.00, Lockbox#: 83495, Acc#: -
Batch: 1, Tran: 2, Date: 20190201, Amount: 0.00, Lockbox#: 83495, Acc#: -
Batch: 1, Tran: 3, Date: 20190201, Amount: 1671.06, Lockbox#: 83495, Acc#: 3601085602, Check#: 8723
Batch: 1, Tran: 3, Date: 20190201, Amount: 0.00, Lockbox#: 83495, Acc#: -
Batch: 1, Tran: 3, Date: 20190201, Amount: 0.00, Lockbox#: 83495, Acc#: -
Batch: 1, Tran: 3, Date: 20190201, Amount: 0.00, Lockbox#: 83495, Acc#: -

0 个答案:

没有答案