您好我使用的是tesseract 3.02.02,它为非常清晰的图像提供了错误的结果。
示例图片:
代码:
tesseract::TessBaseAPI *myOCR = new tesseract::TessBaseAPI();
cout<< myOCR->Version() <<endl;
cout<< getLeptonicaVersion() <<endl;
if (myOCR->Init(NULL, "eng")) {
qDebug()<< "Could not initialize tesseract";
}
myOCR->SetVariable("tessedit_char_whitelist","0123456789");
tesseract::PageSegMode pagesegmode = static_cast<tesseract::PageSegMode>(10); // treat the image as a single character
myOCR->SetPageSegMode(pagesegmode);
Mat imm = imread("/home/de/Desktop/0.png");
cvtColor(imm,imm,CV_BGR2GRAY);
threshold(imm, imm, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
imm = ~imm;
common::deleteBorderObjects(&imm);
imm = ~imm;
Pix *pixx = mat8ToPix(&imm);//convert Mat to pix
myOCR->SetImage(pixx);
const char *text1 = myOCR->GetUTF8Text();
结果是1。
我认为图像非常清晰。
这是什么问题?是关于tesseract参数吗?
答案 0 :(得分:0)
尝试在我的机器上进行额外处理。结果是0。
string currentPage = Browser.Current.Url;
string userPath = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
string downloadPath = Path.Combine(userPath, "Downloads");
DirectoryInfo dirInfo = new DirectoryInfo(downloadPath);
if (!dirInfo.Exists)
{
dirInfo.Create();
}
int directoryFiles = dirInfo.EnumerateFiles().Count();
string elementXpath = "//div[@id='myDiv']/div/div/div[@class='atalhos']/a[1]";
bool isFirefox = (Browser.Current as FirefoxDriver) != null;
bool isChrome = (Browser.Current as ChromeDriver) != null;
IWebDriver browserDriver = null;
if (isChrome)
{
ChromeOptions chromeOptions = new ChromeOptions();
chromeOptions.AddUserProfilePreference("download.default_directory", downloadPath);
chromeOptions.AddUserProfilePreference("disable-popup-blocking", "true");
browserDriver = new ChromeDriver(chromeOptions);
}
else if (isFirefox)
{
FirefoxProfile profile = new FirefoxProfile();
profile.SetPreference("browser.download.folderList", 2);
profile.SetPreference("browser.helperApps.neverAsk.saveToDisk", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
browserDriver = new FirefoxDriver(profile);
}
browserDriver.Navigate().GoToUrl(currentPage);
WebDriverWait wait = new WebDriverWait(browserDriver, TimeSpan.FromSeconds(15));
wait.Until(ExpectedConditions.ElementIsVisible(By.XPath(elementXpath)));
IWebElement elemento = browserDriver.FindElement(By.XPath(elementXpath));
elemento.Click();
Thread.Sleep(7000);
dirInfo = new DirectoryInfo(downloadPath);
int currentFiles = dirInfo.EnumerateFiles().Count();
Assert.Greater(currentFiles, directoryFiles);
尝试在将图像输入tesseract之前查看图像。