我想知道如何接受其他类型的文件进行ocr处理,目前我只能接受img文件,如jpg,png但不接受pdf。此外,我想在tesseract提取文本时显示进度条。有人会引导我,那么?
这是我的代码:
private void btOCR_Click(object sender, EventArgs e)
{
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
try
{
//var img = new Bitmap(openFileDialog.FileName);
var img = Pix.LoadFromFile(openFileDialog.FileName);
var ocr = new TesseractEngine("./tessdata", "eng", EngineMode.TesseractAndCube);
var page = ocr.Process(img);
txtResult.Text = page.GetText() + page.GetMeanConfidence();
}
catch (Exception ex)
{
MessageBox.Show(ex.Message, "Error");
}
}
}