Question

我正在使用google tesseract ocr。它正确地适用于英语。但对孟加拉语来说，它具有垃圾价值。我怎么能这样做呢？为了澄清，我提供了我的代码。我从这里下载this并安装它。在安装过程中，我训练了孟加拉语。 Plase帮帮我

#include<baseapi.h>
#include <allheaders.h>
#include <iostream>
#include <fstream>
using namespace std;

int main(void){

    tesseract::TessBaseAPI api;
    api.Init("", "ben", tesseract::OEM_DEFAULT);
    api.SetPageSegMode(static_cast<tesseract::PageSegMode>(7));
    api.SetOutputName("out");

    cout<<"File name:";
    char image[256];
    cin>>image;
    PIX   *pixs = pixRead(image);

    STRING text_out;
    api.ProcessPages(image, NULL, 0, &text_out);

    //cout<<text_out.string();
    ofstream myfile;
    myfile.open ("example.txt");
    myfile << text_out.string();
    myfile.close();
}

Tesseract ocr for vs C ++中的bengali语言

0 个答案: