我在Spring MVC中使用Tesseract ocr来转换一些可搜索的pdf文件中的图像。
我使用了tessApi和tessrenderresult,它在简单的java项目中运行得非常好。但是,当我在tomcat 7中部署它时,它在Spring MVC项目中不起作用。我遇到了错误:
int result = api.TessBaseAPIProcessPages1(handle, imagepath, null, 0,
renderer);
我正在接受以下异常
Error during processing.
以下是我的代码:
public class ImageToPDF {
private static String datapath = "F:/Projects Dev/Pfe Projects/GedWeb/.";
private static String language = "fra";
private static TessAPI api;
private static TessBaseAPI handle;
public ImageToPDF() {
super();
api = new TessDllAPIImpl().getInstance();
handle = api.TessBaseAPICreate();
}
public void finalize() throws Throwable {
super.finalize();
}
public void convert(String imagepath) {
File file = new File(imagepath);
String nomfile = FilenameUtils.removeExtension(file.getName());
String parentfolder = file.getParent();
String output = parentfolder + File.separator + nomfile + ".txt";
int set_only_init_params = FALSE;
int oem = TessOcrEngineMode.OEM_DEFAULT;
PointerByReference configs = null;
int configs_size = 0;
String[] params = { "load_system_dawg", "tessedit_char_whitelist" };
String vals[] = { "Relevé", "" }; // 0123456789-.IThisalotfpnex
PointerByReference vars_vec = new PointerByReference();
vars_vec.setPointer(new StringArray(params));
PointerByReference vars_values = new PointerByReference();
vars_values.setPointer(new StringArray(vals));
NativeSize vars_vec_size = new NativeSize(params.length);
api.TessBaseAPISetOutputName(handle, output);
int rc = api.TessBaseAPIInit4(handle, datapath, language, oem, configs,
configs_size, vars_vec, vars_values, vars_vec_size,
set_only_init_params);
System.out.println(rc);
if (rc != 0) {
api.TessBaseAPIDelete(handle);
System.err.println("Could not initialize tesseract.");
return;
}
TessResultRenderer renderer = api.TessHOcrRendererCreate();
api.TessResultRendererInsert(renderer, api.TessBoxTextRendererCreate());
api.TessResultRendererInsert(renderer, api.TessTextRendererCreate());
String dataPath = api.TessBaseAPIGetDatapath(handle);
api.TessResultRendererInsert(renderer,
api.TessPDFRendererCreate(dataPath));
api.TessResultRendererBeginDocument(renderer, imagepath);
int result = api.TessBaseAPIProcessPages1(handle, imagepath, null, 0,
renderer);
api.TessResultRendererEndDocument(renderer);
System.out.println(handle.toString());
System.out.println(renderer.toString());
if (result == FALSE) {
System.err.println("Error during processing.");
return;
}
File file1 = new File(imagepath);
String parent = file1.getParent();
String nomfile1 = FilenameUtils.removeExtension(file1.getName());
String outputbase = parent + File.separator + nomfile1;
for (; renderer != null; renderer = api
.TessResultRendererNext(renderer)) {
String ext = api.TessResultRendererExtention(renderer).getString(0);
System.out
.println(String
.format("TessResultRendererExtention: %s\nTessResultRendererTitle: %s\nTessResultRendererImageNum: %d",
ext, api.TessResultRendererTitle(renderer)
.getString(0),
api.TessResultRendererImageNum(renderer)));
PointerByReference data = new PointerByReference();
IntByReference dataLength = new IntByReference();
result = api
.TessResultRendererGetOutput(renderer, data, dataLength);
if (result == TRUE) {
if (ext.equals("pdf")) {
int length = dataLength.getValue();
byte[] bytes = data.getValue().getByteArray(0, length);
try {
File file3 = new File(outputbase + "." + ext);
// create parent dirs when necessary
if (file3.getParentFile() != null) {
file3.getParentFile().mkdirs();
}
FileOutputStream bw = new FileOutputStream(
file3.getAbsoluteFile());
bw.write(bytes);
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
} else {
String result1 = data.getValue().getString(0);
}
}
}
api.TessDeleteResultRenderer(renderer);
api.TessBaseAPIDelete(handle);
}
}
有什么建议吗?