请注意,我正在使用tes4j编写Java程序,并且能够提取tiff文件并将其保存为pdf,但是很多时候我遇到此错误。我正在使用可调用批处理运行文件,我正在处理5个文件并处理它们,在此期间我收到此错误。我正在使用test4j作为主要依赖。
错误描述
java.util.concurrent.ExecutionException: java.lang.Error: Invalid memory access
at java.util.concurrent.FutureTask.report(Unknown Source)
at java.util.concurrent.FutureTask.get(Unknown Source)
at com.mkyong.listener.SerachablePDFConversionService.processAllFiles(SerachablePDFConversionService.java:197)
at com.mkyong.listener.SerachablePDFConversionService.run(SerachablePDFConversionService.java:107)
at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.Error: Invalid memory access
at com.sun.jna.Native.invokeInt(Native Method)
at com.sun.jna.Function.invoke(Function.java:419)
at com.sun.jna.Function.invoke(Function.java:354)
at com.sun.jna.Library$Handler.invoke(Library.java:244)
at com.sun.proxy.$Proxy0.gsapi_init_with_args(Unknown Source)
at org.ghost4j.Ghostscript.initialize(Ghostscript.java:350)
at com.mkyong.listener.SerachablePDFConversionService.convertPDFToTiff(SerachablePDFConversionService.java:137)
at com.mkyong.listener.SerachablePDFConversionService$1.call(SerachablePDFConversionService.java:213)
at com.mkyong.listener.SerachablePDFConversionService$1.call(SerachablePDFConversionService.java:1)
at java.util.concurrent.FutureTask.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
代码
package com.apache.pdfbox.ocr.tesseract;
import java.io.File;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.ghost4j.Ghostscript;
import org.ghost4j.GhostscriptException;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
public class SerachablePDFConversionService {
private static final String OCR_INPUT_FOLDER = System.getenv("OCR_INPUT");
private static final String OCR_OUTPUT_FOLDER = System.getenv("OCR_OUTPUT");
private static final String OCR_SUCCESS_FOLDER = System.getenv("OCR_SUCCESS");
private static final String TESSDATA_PREFIX = System.getenv("TESSDATA_PREFIX");
public static void main(String[] args) {
File inputFiles[] = new File(OCR_INPUT_FOLDER).listFiles();
String tiffFileName = "";
String inputFileName = "";
try {
for (File inputFile : inputFiles) {
inputFileName = inputFile.getName();
System.out.println("Input File Name is [" + inputFileName + "]");
if (inputFileName != null && inputFileName.length() > 0
&& inputFileName.toLowerCase().indexOf(".pdf") > 0) {
tiffFileName = inputFile.getName().replaceAll(".pdf", ".tif").replaceAll(".PDF", ".tif");
System.out.println("Tiff File Name is [" + tiffFileName + "]");
System.out.println("Start Time" + new Date());
if (SerachablePDFConversionService.convertPDFToTiff(inputFileName, tiffFileName).equals("true")) {
System.out.println("PDF to tiff conversion is successful");
if (SerachablePDFConversionService.doOCR(inputFileName, tiffFileName).equals("true")) {
System.out.println("Searchable PDF creation is successful");
Files.move(
FileSystems.getDefault()
.getPath(OCR_OUTPUT_FOLDER + File.separator + inputFileName),
FileSystems.getDefault()
.getPath(OCR_SUCCESS_FOLDER + File.separator + inputFileName),
StandardCopyOption.REPLACE_EXISTING);
System.out.println("End Time" + new Date());
} else {
System.out.println("Searchable PDF creation is failed");
}
} else {
System.out.println("PDF to tiff conversion is failed");
}
} else {
}
}
} catch (Exception e) {
System.out.println("ERROR in Main Method: " + e.getMessage());
System.err.println(e.getMessage());
}
}
public static String covertToTiffAndOCR(ArrayList<String> inputFiles) throws Exception {
String success = "false";
for (String inputFileName : inputFiles) {
System.out.println("File Name " + inputFileName);
String tiffFileName = "";
if (inputFileName != null && inputFileName.length() > 0
&& inputFileName.toLowerCase().indexOf(".pdf") > 0) {
tiffFileName = inputFileName.replaceAll(".pdf", ".tif").replaceAll(".PDF", ".tif");
System.out.println("Tiff File Name is [" + tiffFileName + "]");
if (SerachablePDFConversionService.convertPDFToTiff(inputFileName, tiffFileName).equals("true")) {
System.out.println("PDF to tiff conversion is successful");
if (SerachablePDFConversionService.doOCR(inputFileName, tiffFileName).equals("true")) {
System.out.println("Searchable PDF creation is successful");
Files.move(FileSystems.getDefault().getPath(OCR_OUTPUT_FOLDER + File.separator + inputFileName),
FileSystems.getDefault().getPath(OCR_SUCCESS_FOLDER + File.separator + inputFileName),
StandardCopyOption.REPLACE_EXISTING);
} else {
System.out.println("Searchable PDF creation is failed");
}
} else {
System.out.println("PDF to tiff conversion is failed");
}
} else {
}
}
success = "true";
return success;
}
public static String convertPDFToTiff(String pdfFile, String tiffFile) {
System.out.println("Called=========convertPDFToTiff " + pdfFile + "tiffFile " + tiffFile);
String opSuccess = "false";
Ghostscript gs = Ghostscript.getInstance();
try {
synchronized (gs) {
String[] gsArgs = new String[9];
gsArgs[0] = "-gswin64";
gsArgs[1] = "-q";
gsArgs[2] = "-r300x300";
gsArgs[3] = "-dNOPAUSE";
gsArgs[4] = "-dBATCH";
// gsArgs[5] = "-sDEVICE=tiffg4";
// gsArgs[5] = "-sDEVICE=tiffgray";
gsArgs[5] = "-sDEVICE=tiff24nc";
gsArgs[6] = "-sCompression=lzw";
gsArgs[7] = "-sOutputFile=" + OCR_OUTPUT_FOLDER + File.separator + tiffFile;
gsArgs[8] = OCR_INPUT_FOLDER + File.separator + pdfFile;
// execute and exit interpreter
gs.initialize(gsArgs);
gs.exit();
opSuccess = "true";
}
} catch (GhostscriptException e) {
opSuccess = "false";
System.out.println("ERROR: " + e.getMessage());
} catch (Exception e) {
opSuccess = "false";
System.out.println("ERROR: " + e.getMessage());
} finally {
try {
Ghostscript.deleteInstance();
} catch (GhostscriptException e) {
opSuccess = "false";
System.out.println("ERROR: " + e.getMessage());
}
}
return opSuccess;
}
public synchronized static String doOCR(String pdfFile, String tiffFile) {
System.out.println("Called======doOCR " + pdfFile + " tiffFile " + tiffFile);
String opSuccess = "false";
ITesseract instance = new Tesseract();
List<RenderedFormat> formats = new ArrayList<RenderedFormat>();
formats.add(RenderedFormat.PDF);
try {
instance.setDatapath(TESSDATA_PREFIX);
instance.setLanguage("eng+ara");
instance.setOcrEngineMode(1);
instance.setPageSegMode(3);
instance.createDocuments(OCR_OUTPUT_FOLDER + File.separator + tiffFile,
OCR_OUTPUT_FOLDER + File.separator + pdfFile.replaceAll(".pdf", "").replaceAll(".PDF", ""),
formats);
opSuccess = "true";
} catch (TesseractException e) {
opSuccess = "false";
System.out.println("OCR ERROR: " + e.getMessage());
System.err.println(e.getMessage());
} catch (Exception e) {
opSuccess = "false";
System.out.println("OCR ERROR: " + e.getMessage());
System.err.println(e.getMessage());
}
return opSuccess;
}
public void processAllFiles(ArrayList<String> ipFiles) throws Exception {
java.util.List<Callable<String>> tasks = new ArrayList<Callable<String>>(ipFiles.size());
for (String ipFileName : ipFiles) {
System.out.println("11111111" + ipFileName);
tasks.add(processPartTask1(ipFileName));
}
ExecutorService es = Executors.newFixedThreadPool(ipFiles.size());
java.util.List<Future<String>> results = es.invokeAll(tasks);
for (Future<String> result : results)
System.out.println(result.get());
es.shutdown();
}
public Callable<String> processPartTask1(String ipFileName) {
return new Callable<String>() {
public String call() throws Exception {
System.out.println("22222222" + ipFileName);
String tiffFileName = "";
String inputFileName = ipFileName;
String returnvalue = "false";
if (inputFileName != null && inputFileName.length() > 0
&& inputFileName.toLowerCase().indexOf(".pdf") > 0) {
tiffFileName = ipFileName.replaceAll(".pdf", ".tif").replaceAll(".PDF", ".tif");
}
if (SerachablePDFConversionService.convertPDFToTiff(inputFileName, tiffFileName).equals("true")) {
System.out.println("PDF to tiff conversion is successful");
if (SerachablePDFConversionService.doOCR(inputFileName, tiffFileName).equals("true")) {
System.out.println("Searchable PDF creation is successful");
Files.move(FileSystems.getDefault().getPath(OCR_OUTPUT_FOLDER + File.separator + inputFileName),
FileSystems.getDefault().getPath(OCR_SUCCESS_FOLDER + File.separator + inputFileName),
StandardCopyOption.REPLACE_EXISTING);
System.out.println("End Time" + new Date());
returnvalue = "true " + inputFileName;
} else {
System.out.println("Searchable PDF creation is failed");
}
}
return returnvalue;// this needs to be changed
}
};
}
public void processPDFFiles() throws Exception {
File inputFiles[] = new File(OCR_INPUT_FOLDER).listFiles();
String inputFileName = "";
ArrayList<String> files = new ArrayList<String>();
try {
for (File inputFile : inputFiles) {
inputFileName = inputFile.getName();
files.add(inputFileName);
}
} catch (Exception e) {
}
SerachablePDFConversionService serachablePDFConversionService = new SerachablePDFConversionService();
serachablePDFConversionService.processAllFiles(files);
}
}