我们正在使用php,pypdfocr和pdftotext进行OCR,并从已扫描或传真给我们的文档中提取文本。问题是当文档被上下扫描或传真时,或者某些页面是否应该被横向阅读(因此文本在页面上旋转90度)
我尝试过的事情:
对于具有90度文本的页面而言,生成的OCR文本层并不坏,但是页面是颠倒的,它会对每个单词进行OCR并将其翻转到位,以便如果'这是一个测试'出现在文档中,但文本层可能会翻译,然后测试a就是这个'
如果有办法检测页面是否颠倒了我可以使用pdftk在我通过OCR运行之前旋转页面(或者我可以删除文本层,如果它是OCR&d;并且运行它虽然在使用pdftk旋转后再次使用OCR)
此时可以从linux CLI执行的任何解决方案都是可行的解决方案。
答案 0 :(得分:4)
您可以轻松地通过tesseract(> = 3.03?)获取有关页面方向的信息。 E.g。
$ tesseract image.png - -psm 0
将产生此输出
Orientation: 3
Orientation in degrees: 90
Orientation confidence: 25.40
Script: 1
Script confidence: 18.40
根据此信息,您可以调整图像旋转。如何在python中执行此操作的示例可以是例如在脚本Fix image rotation with tesseract。
答案 1 :(得分:1)
我有同样的问题。我的修复是创建一个简单的C ++应用程序,它将PNG文件名作为参数并自动旋转/校正它。
我的代码是
#include <iostream>
#include <cmath>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
using namespace std;
int main(int argc, char **argv)
{
if (argc != 2) {
cerr << "usage: " << argv[0] << " <image>\n";
exit(1);
}
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
cerr << "Could not initialize tesseract.\n";
exit(2);
}
const char* inputfile = argv[1];
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;
PIX *image = pixRead(inputfile);
if (image == NULL) {
cerr << "could not open " << inputfile << endl;
return -2;
}
api->SetPageSegMode(tesseract::PSM_AUTO_OSD);
api->SetImage(image);
api->Recognize(0);
tesseract::PageIterator* it = api->AnalyseLayout();
it->Orientation(&orientation, &direction, &order, &deskew_angle);
cout << "Orientation: " << orientation <<
"\nWritingDirection: " << direction <<
"\nTextlineOrder: " << order <<
"\nDeskew angle: " << deskew_angle << "\n";
PIX* pixd = NULL;
switch (orientation) {
case 0:
cout << "image in the correct position, nothing to do\n";
if (fabs(deskew_angle) > 0.0001f) {
cout << "deskewing...\n";
pixd = pixRotate(image, -deskew_angle, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0);
}
break;
case 1:
cout << "rotating image by 270 degrees\n";
pixd = pixRotate90(image, -1);
if (deskew_angle > 0.0001f) {
cout << "deskewing...\n";
pixd = pixRotate(pixd, -deskew_angle, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0);
}
break;
case 2:
cout << "rotating image by 180 degrees\n";
pixd = pixRotate180(NULL, image);
if (deskew_angle > 0.0001f) {
cout << "deskewing...\n";
pixd = pixRotate(pixd, -deskew_angle, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0);
}
break;
case 3:
cout << "rotating image by 90 degrees\n";
pixd = pixRotate90(image, 1);
if (deskew_angle > 0.0001f) {
cout << "deskewing...\n";
pixd = pixRotate(pixd, -deskew_angle, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0);
}
break;
}
pixDestroy(&image);
if (pixd != NULL) {
pixWrite(inputfile, pixd, IFF_PNG);
pixDestroy(&pixd);
}
return 0;
}
您可以使用
进行编译g++ -o tesseract_fixposition tesseract_fixposition.cpp -llept -ltesseract
依赖项是libtesseract和libleptonica。我测试了Tesseract版本3.03和3.04,以及Leptonica 1.72。我处理了几千张图片,没有发现任何不正确的识别。
希望这有帮助!
答案 2 :(得分:1)
如果速度问题,您不需要使用tesseract来修复页面方向。你可以使用leptonica函数。像这样:
/*
* Compile with:
* g++ fixorientation.cpp -o fixorientation -llept
*
*/
#include <cstring>
#include <leptonica/allheaders.h>
int main(int argc, char *argv[]) {
const char* filename = NULL;
const char* outfile = NULL;
l_int32 orient, format;
l_int32 alt_rot = -1;
l_float32 upconf1, leftconf1;
PIX *fpixs, *pixs;
if (argc < 1) {
fprintf(stderr, "Usage is:\n\t%s -f filename [-o output]\n", argv[0]);
return(1);
} else {
for (int i = 1; i < argc; i++) {
if (i + 1 < argc) {
if (strcmp(argv[i], "-f") == 0) {
filename = argv[i + 1];
} else if (strcmp(argv[i], "-o") == 0) {
outfile = argv[i + 1];
}
}
}
}
if (filename) {
pixs = pixRead(filename);
} else {
fprintf(stderr, "Usage is:\n\t%s -f filename [-o output]\n", argv[0]);
return(1);
}
if (pixs == NULL) {
fprintf(stderr, "Unsupported image type.\n");
return(3);
}
format = pixGetInputFormat(pixs);
fpixs = pixConvertTo1(pixs, 130);
pixOrientDetect(fpixs, &upconf1, &leftconf1, 0, 0);
makeOrientDecision(upconf1, leftconf1, 0, 0, &orient, 1);
if (orient == L_TEXT_ORIENT_UNKNOWN) {
fprintf(stdout, "Confidence is low; no determination is made. "
"But maybe there is %1 deg rotation.\n", alt_rot);
} else if (orient == L_TEXT_ORIENT_UP) {
fprintf(stdout, "Text is rightside-up\n");
alt_rot = 0;
} else if (orient == L_TEXT_ORIENT_LEFT) {
fprintf(stdout, "Text is rotated 90 deg ccw\n");
alt_rot = 1;
} else if (orient == L_TEXT_ORIENT_DOWN) {
fprintf(stdout, "Text is upside-down\n");
alt_rot = 2;
} else { /* orient == L_TEXT_ORIENT_RIGHT */
fprintf(stdout, "Text is rotated 90 deg cw\n");
alt_rot = 3;
}
if (alt_rot > -1) {
fpixs = pixRotateOrth(pixs, alt_rot);
if (outfile) {
pixWrite(outfile, fpixs, format);
} else {
char savefile[strlen("fixed_") + strlen(filename) + 1];
strcpy(savefile, "fixed_");
strcat(savefile, filename);
fprintf(stdout, "Output save to %s\n", savefile);
pixWrite(savefile, fpixs, format);
}
} else {
return(2);
}
pixDestroy(&fpixs);
pixDestroy(&pixs);
return(0);
}