java.util.concurrent.ExecutionException:java.lang.Error:无效的内存访问

时间:2018-06-27 10:30:19

标签: java windows tesseract

请注意,我正在使用tes4j编写Java程序,并且能够提取tiff文件并将其保存为pdf,但是很多时候我遇到此错误。我正在使用可调用批处理运行文件,我正在处理5个文件并处理它们,在此期间我收到此错误。我正在使用test4j作为主要依赖。

错误描述

java.util.concurrent.ExecutionException: java.lang.Error: Invalid memory access
        at java.util.concurrent.FutureTask.report(Unknown Source)
        at java.util.concurrent.FutureTask.get(Unknown Source)
        at com.mkyong.listener.SerachablePDFConversionService.processAllFiles(SerachablePDFConversionService.java:197)
        at com.mkyong.listener.SerachablePDFConversionService.run(SerachablePDFConversionService.java:107)
        at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.Error: Invalid memory access
        at com.sun.jna.Native.invokeInt(Native Method)
        at com.sun.jna.Function.invoke(Function.java:419)
        at com.sun.jna.Function.invoke(Function.java:354)
        at com.sun.jna.Library$Handler.invoke(Library.java:244)
        at com.sun.proxy.$Proxy0.gsapi_init_with_args(Unknown Source)
        at org.ghost4j.Ghostscript.initialize(Ghostscript.java:350)
        at com.mkyong.listener.SerachablePDFConversionService.convertPDFToTiff(SerachablePDFConversionService.java:137)
        at com.mkyong.listener.SerachablePDFConversionService$1.call(SerachablePDFConversionService.java:213)
        at com.mkyong.listener.SerachablePDFConversionService$1.call(SerachablePDFConversionService.java:1)
        at java.util.concurrent.FutureTask.run(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)

代码

    package com.apache.pdfbox.ocr.tesseract;

import java.io.File;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.ghost4j.Ghostscript;
import org.ghost4j.GhostscriptException;

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;

public class SerachablePDFConversionService {

    private static final String OCR_INPUT_FOLDER = System.getenv("OCR_INPUT");
    private static final String OCR_OUTPUT_FOLDER = System.getenv("OCR_OUTPUT");
    private static final String OCR_SUCCESS_FOLDER = System.getenv("OCR_SUCCESS");
    private static final String TESSDATA_PREFIX = System.getenv("TESSDATA_PREFIX");

    public static void main(String[] args) {
        File inputFiles[] = new File(OCR_INPUT_FOLDER).listFiles();
        String tiffFileName = "";
        String inputFileName = "";
        try {
            for (File inputFile : inputFiles) {
                inputFileName = inputFile.getName();
                System.out.println("Input File Name is [" + inputFileName + "]");
                if (inputFileName != null && inputFileName.length() > 0
                        && inputFileName.toLowerCase().indexOf(".pdf") > 0) {
                    tiffFileName = inputFile.getName().replaceAll(".pdf", ".tif").replaceAll(".PDF", ".tif");
                    System.out.println("Tiff File Name is [" + tiffFileName + "]");
                    System.out.println("Start Time" + new Date());
                    if (SerachablePDFConversionService.convertPDFToTiff(inputFileName, tiffFileName).equals("true")) {
                        System.out.println("PDF to tiff conversion is successful");
                        if (SerachablePDFConversionService.doOCR(inputFileName, tiffFileName).equals("true")) {
                            System.out.println("Searchable PDF creation is successful");
                            Files.move(
                                    FileSystems.getDefault()
                                            .getPath(OCR_OUTPUT_FOLDER + File.separator + inputFileName),
                                    FileSystems.getDefault()
                                            .getPath(OCR_SUCCESS_FOLDER + File.separator + inputFileName),
                                    StandardCopyOption.REPLACE_EXISTING);
                            System.out.println("End Time" + new Date());
                        } else {
                            System.out.println("Searchable PDF creation is failed");
                        }
                    } else {
                        System.out.println("PDF to tiff conversion is failed");
                    }
                } else {

                }
            }
        } catch (Exception e) {
            System.out.println("ERROR in Main Method: " + e.getMessage());
            System.err.println(e.getMessage());
        }
    }

    public static String covertToTiffAndOCR(ArrayList<String> inputFiles) throws Exception {
        String success = "false";
        for (String inputFileName : inputFiles) {
            System.out.println("File Name " + inputFileName);
            String tiffFileName = "";
            if (inputFileName != null && inputFileName.length() > 0
                    && inputFileName.toLowerCase().indexOf(".pdf") > 0) {
                tiffFileName = inputFileName.replaceAll(".pdf", ".tif").replaceAll(".PDF", ".tif");
                System.out.println("Tiff File Name is [" + tiffFileName + "]");
                if (SerachablePDFConversionService.convertPDFToTiff(inputFileName, tiffFileName).equals("true")) {
                    System.out.println("PDF to tiff conversion is successful");
                    if (SerachablePDFConversionService.doOCR(inputFileName, tiffFileName).equals("true")) {
                        System.out.println("Searchable PDF creation is successful");
                        Files.move(FileSystems.getDefault().getPath(OCR_OUTPUT_FOLDER + File.separator + inputFileName),
                                FileSystems.getDefault().getPath(OCR_SUCCESS_FOLDER + File.separator + inputFileName),
                                StandardCopyOption.REPLACE_EXISTING);
                    } else {
                        System.out.println("Searchable PDF creation is failed");
                    }
                } else {
                    System.out.println("PDF to tiff conversion is failed");
                }
            } else {

            }
        }
        success = "true";
        return success;
    }

    public static String convertPDFToTiff(String pdfFile, String tiffFile) {
        System.out.println("Called=========convertPDFToTiff " + pdfFile + "tiffFile " + tiffFile);
        String opSuccess = "false";
        Ghostscript gs = Ghostscript.getInstance();
        try {
            synchronized (gs) {
                String[] gsArgs = new String[9];
                gsArgs[0] = "-gswin64";
                gsArgs[1] = "-q";
                gsArgs[2] = "-r300x300";
                gsArgs[3] = "-dNOPAUSE";
                gsArgs[4] = "-dBATCH";
                // gsArgs[5] = "-sDEVICE=tiffg4";
                // gsArgs[5] = "-sDEVICE=tiffgray";
                gsArgs[5] = "-sDEVICE=tiff24nc";
                gsArgs[6] = "-sCompression=lzw";
                gsArgs[7] = "-sOutputFile=" + OCR_OUTPUT_FOLDER + File.separator + tiffFile;
                gsArgs[8] = OCR_INPUT_FOLDER + File.separator + pdfFile;
                // execute and exit interpreter

                gs.initialize(gsArgs);
                gs.exit();
                opSuccess = "true";

            }
        } catch (GhostscriptException e) {
            opSuccess = "false";
            System.out.println("ERROR: " + e.getMessage());
        } catch (Exception e) {
            opSuccess = "false";
            System.out.println("ERROR: " + e.getMessage());
        } finally {
            try {
                Ghostscript.deleteInstance();
            } catch (GhostscriptException e) {
                opSuccess = "false";
                System.out.println("ERROR: " + e.getMessage());
            }
        }
        return opSuccess;
    }

    public synchronized static String doOCR(String pdfFile, String tiffFile) {
        System.out.println("Called======doOCR " + pdfFile + " tiffFile " + tiffFile);
        String opSuccess = "false";
        ITesseract instance = new Tesseract();
        List<RenderedFormat> formats = new ArrayList<RenderedFormat>();
        formats.add(RenderedFormat.PDF);

        try {
            instance.setDatapath(TESSDATA_PREFIX);
            instance.setLanguage("eng+ara");
            instance.setOcrEngineMode(1);
            instance.setPageSegMode(3);
            instance.createDocuments(OCR_OUTPUT_FOLDER + File.separator + tiffFile,
                    OCR_OUTPUT_FOLDER + File.separator + pdfFile.replaceAll(".pdf", "").replaceAll(".PDF", ""),
                    formats);
            opSuccess = "true";
        } catch (TesseractException e) {
            opSuccess = "false";
            System.out.println("OCR ERROR: " + e.getMessage());
            System.err.println(e.getMessage());
        } catch (Exception e) {
            opSuccess = "false";
            System.out.println("OCR ERROR: " + e.getMessage());
            System.err.println(e.getMessage());
        }
        return opSuccess;
    }

    public void processAllFiles(ArrayList<String> ipFiles) throws Exception {

        java.util.List<Callable<String>> tasks = new ArrayList<Callable<String>>(ipFiles.size());
        for (String ipFileName : ipFiles) {
            System.out.println("11111111" + ipFileName);
            tasks.add(processPartTask1(ipFileName));
        }
        ExecutorService es = Executors.newFixedThreadPool(ipFiles.size());
        java.util.List<Future<String>> results = es.invokeAll(tasks);
        for (Future<String> result : results)
            System.out.println(result.get());
        es.shutdown();
    }

    public Callable<String> processPartTask1(String ipFileName) {
        return new Callable<String>() {
            public String call() throws Exception {
                System.out.println("22222222" + ipFileName);
                String tiffFileName = "";
                String inputFileName = ipFileName;
                String returnvalue = "false";
                if (inputFileName != null && inputFileName.length() > 0
                        && inputFileName.toLowerCase().indexOf(".pdf") > 0) {
                    tiffFileName = ipFileName.replaceAll(".pdf", ".tif").replaceAll(".PDF", ".tif");
                }
                if (SerachablePDFConversionService.convertPDFToTiff(inputFileName, tiffFileName).equals("true")) {
                    System.out.println("PDF to tiff conversion is successful");
                    if (SerachablePDFConversionService.doOCR(inputFileName, tiffFileName).equals("true")) {
                        System.out.println("Searchable PDF creation is successful");
                        Files.move(FileSystems.getDefault().getPath(OCR_OUTPUT_FOLDER + File.separator + inputFileName),
                                FileSystems.getDefault().getPath(OCR_SUCCESS_FOLDER + File.separator + inputFileName),
                                StandardCopyOption.REPLACE_EXISTING);
                        System.out.println("End Time" + new Date());
                        returnvalue = "true " + inputFileName;
                    } else {
                        System.out.println("Searchable PDF creation is failed");
                    }
                }

                return returnvalue;// this needs to be changed
            }
        };
    }

    public void processPDFFiles() throws Exception {
        File inputFiles[] = new File(OCR_INPUT_FOLDER).listFiles();
        String inputFileName = "";
        ArrayList<String> files = new ArrayList<String>();
        try {
            for (File inputFile : inputFiles) {
                inputFileName = inputFile.getName();
                files.add(inputFileName);
            }
        } catch (Exception e) {
        }
        SerachablePDFConversionService serachablePDFConversionService = new SerachablePDFConversionService();
        serachablePDFConversionService.processAllFiles(files);
    }

}

0 个答案:

没有答案