检测颠倒的PDF页面

时间:2015-08-05 12:46:47

标签: php tesseract ghostscript pdftk

我们正在使用php,pypdfocr和pdftotext进行OCR,并从已扫描或传真给我们的文档中提取文本。问题是当文档被上下扫描或传真时,或者某些页面是否应该被横向阅读(因此文本在页面上旋转90度)

我尝试过的事情:

  • in tessdata cp eng.traineddata osd.traineddata

对于具有90度文本的页面而言,生成的OCR文本层并不坏,但是页面是颠倒的,它会对每个单词进行OCR并将其翻转到位,以便如果'这是一个测试'出现在文档中,但文本层可能会翻译,然后测试a就是这个'

如果有办法检测页面是否颠倒了我可以使用pdftk在我通过OCR运行之前旋转页面(或者我可以删除文本层,如果它是OCR&d;并且运行它虽然在使用pdftk旋转后再次使用OCR)

此时可以从linux CLI执行的任何解决方案都是可行的解决方案。

3 个答案:

答案 0 :(得分:4)

您可以轻松地通过tesseract(> = 3.03?)获取有关页面方向的信息。 E.g。

$ tesseract image.png -  -psm 0

将产生此输出

Orientation: 3
Orientation in degrees: 90
Orientation confidence: 25.40
Script: 1 
Script confidence: 18.40

根据此信息,您可以调整图像旋转。如何在python中执行此操作的示例可以是例如在脚本Fix image rotation with tesseract

答案 1 :(得分:1)

我有同样的问题。我的修复是创建一个简单的C ++应用程序,它将PNG文件名作为参数并自动旋转/校正它。

我的代码是

#include <iostream>
#include <cmath>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

using namespace std;

int main(int argc, char **argv)
{

    if (argc != 2) {
        cerr << "usage: " << argv[0] << " <image>\n";
        exit(1);
    }

    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    // Initialize tesseract-ocr with English, without specifying tessdata path
    if (api->Init(NULL, "eng")) {
        cerr << "Could not initialize tesseract.\n";
        exit(2);
    }

    const char* inputfile = argv[1];
    tesseract::Orientation orientation;
    tesseract::WritingDirection direction;
    tesseract::TextlineOrder order;
    float deskew_angle;

    PIX *image = pixRead(inputfile);
    if (image == NULL) {
        cerr << "could not open " << inputfile << endl;
        return -2;
    }

    api->SetPageSegMode(tesseract::PSM_AUTO_OSD);
    api->SetImage(image);
    api->Recognize(0);

    tesseract::PageIterator* it =  api->AnalyseLayout();
    it->Orientation(&orientation, &direction, &order, &deskew_angle);
    cout << "Orientation: " << orientation << 
            "\nWritingDirection: " << direction <<
            "\nTextlineOrder: " << order << 
            "\nDeskew angle: " << deskew_angle << "\n";

    PIX* pixd = NULL;
    switch (orientation) {
        case 0:
            cout << "image in the correct position, nothing to do\n";
            if (fabs(deskew_angle) > 0.0001f) {
                cout << "deskewing...\n";
                pixd = pixRotate(image, -deskew_angle, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0);
            }
            break;
        case 1:
            cout << "rotating image by 270 degrees\n";
            pixd = pixRotate90(image, -1);
            if (deskew_angle > 0.0001f) {
                cout << "deskewing...\n";
                pixd = pixRotate(pixd, -deskew_angle, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0);
            }
            break;
        case 2:
            cout << "rotating image by 180 degrees\n";
            pixd = pixRotate180(NULL, image);
            if (deskew_angle > 0.0001f) {
                cout << "deskewing...\n";
                pixd = pixRotate(pixd, -deskew_angle, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0);
            }
            break;
        case 3:
            cout << "rotating image by 90 degrees\n";
            pixd = pixRotate90(image, 1);
            if (deskew_angle > 0.0001f) {
                cout << "deskewing...\n";
                pixd = pixRotate(pixd, -deskew_angle, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0);
            }
            break;
    }

    pixDestroy(&image);

    if (pixd != NULL) {
        pixWrite(inputfile, pixd, IFF_PNG);
        pixDestroy(&pixd);
    }

    return 0;
}

您可以使用

进行编译
g++ -o tesseract_fixposition tesseract_fixposition.cpp -llept -ltesseract

依赖项是libtesseract和libleptonica。我测试了Tesseract版本3.03和3.04,以及Leptonica 1.72。我处理了几千张图片,没有发现任何不正确的识别。

希望这有帮助!

答案 2 :(得分:1)

如果速度问题,您不需要使用tesseract来修复页面方向。你可以使用leptonica函数。像这样:

/*
 * Compile with:
 *     g++ fixorientation.cpp -o fixorientation -llept
 *
 */

#include <cstring>
#include <leptonica/allheaders.h>

int main(int argc, char *argv[]) {
    const char* filename = NULL;
    const char* outfile = NULL;
    l_int32   orient, format;
    l_int32  alt_rot = -1;
    l_float32 upconf1, leftconf1;
    PIX       *fpixs, *pixs;

    if (argc < 1) {
        fprintf(stderr, "Usage is:\n\t%s -f filename [-o output]\n", argv[0]);
        return(1);
    } else {
        for (int i = 1; i < argc; i++) {
            if (i + 1 < argc) {
                if (strcmp(argv[i], "-f") == 0) {
                    filename = argv[i + 1];
                } else if (strcmp(argv[i], "-o") == 0) {
                    outfile = argv[i + 1];
                }
            }
        }
    }

    if (filename) {
        pixs = pixRead(filename);
    } else {
        fprintf(stderr, "Usage is:\n\t%s -f filename [-o output]\n", argv[0]);
        return(1);
    }

    if (pixs == NULL) {
        fprintf(stderr, "Unsupported image type.\n");
        return(3);
    }
    format = pixGetInputFormat(pixs);

    fpixs = pixConvertTo1(pixs, 130);
    pixOrientDetect(fpixs, &upconf1, &leftconf1, 0, 0);
    makeOrientDecision(upconf1, leftconf1, 0, 0, &orient, 1);

    if (orient == L_TEXT_ORIENT_UNKNOWN) {
        fprintf(stdout, "Confidence is low; no determination is made. "
                "But maybe there is %1 deg rotation.\n", alt_rot);
    } else if (orient == L_TEXT_ORIENT_UP) {
        fprintf(stdout, "Text is rightside-up\n");
        alt_rot = 0;
    } else if (orient == L_TEXT_ORIENT_LEFT) {
        fprintf(stdout, "Text is rotated 90 deg ccw\n");
        alt_rot = 1;
    } else if (orient == L_TEXT_ORIENT_DOWN) {
        fprintf(stdout, "Text is upside-down\n");
        alt_rot = 2;
    } else {  /* orient == L_TEXT_ORIENT_RIGHT */
        fprintf(stdout, "Text is rotated 90 deg cw\n");
        alt_rot = 3;
    }

    if (alt_rot > -1) {
        fpixs = pixRotateOrth(pixs, alt_rot);
        if (outfile) {
            pixWrite(outfile, fpixs, format);
        } else {
            char savefile[strlen("fixed_") + strlen(filename) + 1];
            strcpy(savefile, "fixed_");
            strcat(savefile, filename);
            fprintf(stdout, "Output save to %s\n", savefile);
            pixWrite(savefile, fpixs, format);

        }
    } else {
        return(2);
    }
    pixDestroy(&fpixs);
    pixDestroy(&pixs);
    return(0);
}