我计划在Visual Studio窗口或Web应用程序中使用Tesseract OCR来读取tiff / pdf图像。我尝试下面的代码,但手写tiff图像无法正常阅读,我有垃圾字符。 我的最终目标是选择或不选择问题4,5,6,7A,7B,7C,7D,7E,7F以及评论是什么(参考我的样本tiff)。
请建议如何阅读。 代码使用了:
引用dll c:\ Samples \ packages \ Tesseract.2.3.0.0 \ lib \ net40 \ Tesseract.dll
WebForm1.aspx.cs中
protected void submitFile_ServerClick(object sender, EventArgs e)
{
if (imageFile.PostedFile != null && imageFile.PostedFile.ContentLength > 0)
{
// for now just fail hard if there's any error however in a propper app I would expect a full demo.
using (var engine = new TesseractEngine(Server.MapPath(@"~/tessdata"), "eng", EngineMode.Default))
{
// have to load Pix via a bitmap since Pix doesn't support loading a stream.
using (var image = new System.Drawing.Bitmap(imageFile.PostedFile.InputStream))
{
using (var pix = PixConverter.ToPix(image))
{
using (var page = engine.Process(pix))
{
meanConfidenceLabel.InnerText = String.Format("{0:P}", page.GetMeanConfidence());
resultText.InnerText = page.GetText();
}
}
}
}
inputPanel.Visible = false;
resultPanel.Visible = true;
}
}
protected void restartButton_ServerClick(object sender, EventArgs e)
{
resultPanel.Visible = false;
inputPanel.Visible = true;
}