我试图通过MODI对所选目录中的图像执行批量OCR。这是代码。
private void button1_Click(object sender, EventArgs e) {
CommonOpenFileDialog dialog = new CommonOpenFileDialog {
InitialDirectory = "C:\\Users",
IsFolderPicker = true
};
if (dialog.ShowDialog() == CommonFileDialogResult.Ok) {
invoicePath = dialog.FileName;
CheckFileAndDoOCR(imageDirectory);
}
}
public string CheckFileAndDoOCR(string directoryPath) {
string TheTxt = "";
IEnumerator files = Directory.GetFiles(directoryPath).GetEnumerator();
while (files.MoveNext()) {
FileInfo foo = new FileInfo(Convert.ToString(files.Current));
if (foo.Extension == ".jpg" || foo.Extension == ".JPG") {
TheTxt = DoOCR(foo.FullName);
string txtFileName = foo.DirectoryName + "\\" + foo.Name.Replace(foo.Extension, "") + ".txt";
FileStream createFile = new FileStream(txtFileName, FileMode.OpenOrCreate);
StreamWriter writeFile = new StreamWriter(createFile);
writeFile.Write(TheTxt);
writeFile.Close();
createFile.Close();
}
try {
foo.Delete();
}
catch (Exception ex) {
MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
return TheTxt;
}
public string DoOCR(string FullPath) {
MODI.Document miDoc;
MODI.Word miWord;
MODI.IMiRects miRects;
MODI.IMiRect miRect;
string strRectInfo;
miRect = null;
string txt;
string word;
MODI.Document md = new MODI.Document();
md.Create(FullPath);
md.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, false, false);
MODI.Image image = (MODI.Image)md.Images[0];
txt = image.Layout.Text;
word = null;
image = null;
md.Close(false);
md = null;
GC.Collect();
GC.WaitForPendingFinalizers();
return txt;
}
例如,如何在特定区域执行OCR
Rectangle Area = new Rectangle() {
X = 1367,
Y = 420,
Height = 57,
Width = 411
};
我只需要从所有图像中提取一个区域/矩形的数据。我怎样才能做到这一点?我曾经尝试过IronOCR和Tesseract,它们的工作原理很像。但是有人告诉我要使用MODI。请帮忙。