如何将图像的单个字母旋转到正确的方向以获得最佳OCR?

时间:2017-01-24 21:40:56

标签: c# tesseract aforge

my previous question中,我转换了这张图片:

enter image description here

进入这个:

enter image description here

Tesseract OCR解释为:

1O351

在图像周围放置一个框架

enter image description here

实际上改善了OCR结果。

 1CB51

但是,我需要正确地将所有5个字符都放到OCR中,所以作为一个实验,我使用Paint.NET来旋转并将每个字母对齐到正确的方向:

enter image description here

得出正确答案:

1CB52

我如何在C#中执行此更正?

我已经对各种文本对齐算法进行了一些研究,但它们都假设在源图像中存在文本行,可以从中导出旋转角度的行,但是已经包含字母之间的正确间距和方向关系。

1 个答案:

答案 0 :(得分:11)

您可以使用以下code project article中的代码来细分每个字符。然而,当你试图单独去歪斜这些角色时,你获得的任何结果都不会很好,因为没有太多的信息可以解决。

我尝试使用f(n-1) s HoughLineTransformation class,我的角度范围为80 - 90度。所以我尝试使用以下代码来纠正它们:

f(n-1)

使用RotateImage method taken from here.然而,结果似乎并不是最好的。也许你可以尝试让它们变得更好。

以下是代码项目文章中的代码供您参考。我对它进行了一些更改,使其行为更安全,例如在f(n-2)周围添加AForge.NET并使用private static Bitmap DeskewImageByIndividualChars(Bitmap targetBitmap) { IDictionary<Rectangle, Bitmap> characters = new CCL().Process(targetBitmap); using (Graphics g = Graphics.FromImage(targetBitmap)) { foreach (var character in characters) { double angle; BitmapData bitmapData = character.Value.LockBits(new Rectangle(Point.Empty, character.Value.Size), ImageLockMode.ReadWrite, PixelFormat.Format8bppIndexed); try { HoughLineTransformation hlt = new HoughLineTransformation(); hlt.ProcessImage(bitmapData); angle = hlt.GetLinesByRelativeIntensity(0.5).Average(l => l.Theta); } finally { character.Value.UnlockBits(bitmapData); } using (Bitmap bitmap = RotateImage(character.Value, 90 - angle, Color.White)) { g.DrawImage(bitmap, character.Key.Location); } } } return targetBitmap; } 语句等正确处理对象。

try-finally

使用上面的代码我得到以下输入/输出:

Input Output

正如您所看到的,LockBits旋转得很好,但其他人的情况并不好。

尝试对个别角色进行去歪斜的另一种方法是使用上面的分割例程找到位置。然后将每个角色分别传递到您的识别引擎,看看这是否会改善您的结果。

我使用以下方法使用using类中的using System.Collections.Generic; using System.Drawing; using System.Drawing.Imaging; using System.Linq; namespace ConnectedComponentLabeling { public class CCL { private Bitmap _input; private int[,] _board; public IDictionary<Rectangle, Bitmap> Process(Bitmap input) { _input = input; _board = new int[_input.Width, _input.Height]; Dictionary<int, List<Pixel>> patterns = Find(); var images = new Dictionary<Rectangle, Bitmap>(); foreach (KeyValuePair<int, List<Pixel>> pattern in patterns) { using (Bitmap bmp = CreateBitmap(pattern.Value)) { images.Add(GetBounds(pattern.Value), (Bitmap)bmp.Clone()); } } return images; } protected virtual bool CheckIsBackGround(Pixel currentPixel) { return currentPixel.color.A == 255 && currentPixel.color.R == 255 && currentPixel.color.G == 255 && currentPixel.color.B == 255; } private unsafe Dictionary<int, List<Pixel>> Find() { int labelCount = 1; var allLabels = new Dictionary<int, Label>(); BitmapData imageData = _input.LockBits(new Rectangle(0, 0, _input.Width, _input.Height), ImageLockMode.ReadOnly, PixelFormat.Format24bppRgb); try { int bytesPerPixel = 3; byte* scan0 = (byte*)imageData.Scan0.ToPointer(); int stride = imageData.Stride; for (int i = 0; i < _input.Height; i++) { byte* row = scan0 + (i * stride); for (int j = 0; j < _input.Width; j++) { int bIndex = j * bytesPerPixel; int gIndex = bIndex + 1; int rIndex = bIndex + 2; byte pixelR = row[rIndex]; byte pixelG = row[gIndex]; byte pixelB = row[bIndex]; Pixel currentPixel = new Pixel(new Point(j, i), Color.FromArgb(pixelR, pixelG, pixelB)); if (CheckIsBackGround(currentPixel)) { continue; } IEnumerable<int> neighboringLabels = GetNeighboringLabels(currentPixel); int currentLabel; if (!neighboringLabels.Any()) { currentLabel = labelCount; allLabels.Add(currentLabel, new Label(currentLabel)); labelCount++; } else { currentLabel = neighboringLabels.Min(n => allLabels[n].GetRoot().Name); Label root = allLabels[currentLabel].GetRoot(); foreach (var neighbor in neighboringLabels) { if (root.Name != allLabels[neighbor].GetRoot().Name) { allLabels[neighbor].Join(allLabels[currentLabel]); } } } _board[j, i] = currentLabel; } } } finally { _input.UnlockBits(imageData); } Dictionary<int, List<Pixel>> patterns = AggregatePatterns(allLabels); patterns = RemoveIntrusions(patterns, _input.Width, _input.Height); return patterns; } private Dictionary<int, List<Pixel>> RemoveIntrusions(Dictionary<int, List<Pixel>> patterns, int width, int height) { var patternsCleaned = new Dictionary<int, List<Pixel>>(); foreach (var pattern in patterns) { bool bad = false; foreach (Pixel item in pattern.Value) { //Horiz if (item.Position.X == 0) bad = true; else if (item.Position.Y == width - 1) bad = true; //Vert else if (item.Position.Y == 0) bad = true; else if (item.Position.Y == height - 1) bad = true; } if (!bad) patternsCleaned.Add(pattern.Key, pattern.Value); } return patternsCleaned; } private IEnumerable<int> GetNeighboringLabels(Pixel pix) { var neighboringLabels = new List<int>(); for (int i = pix.Position.Y - 1; i <= pix.Position.Y + 2 && i < _input.Height - 1; i++) { for (int j = pix.Position.X - 1; j <= pix.Position.X + 2 && j < _input.Width - 1; j++) { if (i > -1 && j > -1 && _board[j, i] != 0) { neighboringLabels.Add(_board[j, i]); } } } return neighboringLabels; } private Dictionary<int, List<Pixel>> AggregatePatterns(Dictionary<int, Label> allLabels) { var patterns = new Dictionary<int, List<Pixel>>(); for (int i = 0; i < _input.Height; i++) { for (int j = 0; j < _input.Width; j++) { int patternNumber = _board[j, i]; if (patternNumber != 0) { patternNumber = allLabels[patternNumber].GetRoot().Name; if (!patterns.ContainsKey(patternNumber)) { patterns[patternNumber] = new List<Pixel>(); } patterns[patternNumber].Add(new Pixel(new Point(j, i), Color.Black)); } } } return patterns; } private unsafe Bitmap CreateBitmap(List<Pixel> pattern) { int minX = pattern.Min(p => p.Position.X); int maxX = pattern.Max(p => p.Position.X); int minY = pattern.Min(p => p.Position.Y); int maxY = pattern.Max(p => p.Position.Y); int width = maxX + 1 - minX; int height = maxY + 1 - minY; Bitmap bmp = DrawFilledRectangle(width, height); BitmapData imageData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb); try { byte* scan0 = (byte*)imageData.Scan0.ToPointer(); int stride = imageData.Stride; foreach (Pixel pix in pattern) { scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride] = pix.color.B; scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride + 1] = pix.color.G; scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride + 2] = pix.color.R; } } finally { bmp.UnlockBits(imageData); } return bmp; } private Bitmap DrawFilledRectangle(int x, int y) { Bitmap bmp = new Bitmap(x, y); using (Graphics graph = Graphics.FromImage(bmp)) { Rectangle ImageSize = new Rectangle(0, 0, x, y); graph.FillRectangle(Brushes.White, ImageSize); } return bmp; } private Rectangle GetBounds(List<Pixel> pattern) { var points = pattern.Select(x => x.Position); var x_query = points.Select(p => p.X); int xmin = x_query.Min(); int xmax = x_query.Max(); var y_query = points.Select(p => p.Y); int ymin = y_query.Min(); int ymax = y_query.Max(); return new Rectangle(xmin, ymin, xmax - xmin, ymax - ymin); } } } 来查找角色的角度。它的工作原理是找到“左下”和“右下”之间的角度。如果角色反过来旋转,我还没有测试它是否有效。

B

请注意我的绘图代码有点不合适,这就是为什么List<Pixel>在右边被截断的原因,但是这段代码产生了以下输出:

Output

请注意CCLprivate double GetAngle(List<Pixel> pattern) { var pixels = pattern.Select(p => p.Position).ToArray(); Point bottomLeft = pixels.OrderByDescending(p => p.Y).ThenBy(p => p.X).First(); Point rightBottom = pixels.OrderByDescending(p => p.X).ThenByDescending(p => p.Y).First(); int xDiff = rightBottom.X - bottomLeft.X; int yDiff = rightBottom.Y - bottomLeft.Y; double angle = Math.Atan2(yDiff, xDiff) * 180 / Math.PI; return -angle; } 的旋转距离超出预期,因为它们的曲率。

使用以下代码,从左右边缘获取角度,然后选择最佳角度,旋转似乎更好。注意我只用需要顺时针旋转的字母测试它,所以如果它们需要相反的方式它可能不会很好。

这也是像素的“象限”,这样就可以从它自己的象限中选择每个像素,而不是让两个像素太近。

选择最佳角度的想法是,如果它们相似,在彼此相差1.5度的时刻,但可以很容易地更新,平均它们。另外,我们选择最接近零的那个。

5

现在生成以下输出,我的绘图代码再次略有破坏。请注意B看起来并没有很好地纠正,但仔细观察它只是它的形状导致了这种情况发生。

Output

我改进了绘图代码,并尝试将字符放在同一基线上:

5

然后产生以下输出。请注意,每个角色不在完全相同的基线上,因为需要使用预旋转底部来完成它。需要使用后期轮换的基线来改进代码。在做基线之前对图像进行阈值处理也会有所帮助。

另一个改进是计算每个旋转字符位置的private double GetAngle(List<Pixel> pattern, Rectangle bounds) { int halfWidth = bounds.X + (bounds.Width / 2); int halfHeight = bounds.Y + (bounds.Height / 2); double leftEdgeAngle = GetAngleLeftEdge(pattern, halfWidth, halfHeight); double rightEdgeAngle = GetAngleRightEdge(pattern, halfWidth, halfHeight); if (Math.Abs(leftEdgeAngle - rightEdgeAngle) <= 1.5) { return (leftEdgeAngle + rightEdgeAngle) / 2d; } if (Math.Abs(leftEdgeAngle) > Math.Abs(rightEdgeAngle)) { return rightEdgeAngle; } else { return leftEdgeAngle; } } private double GetAngleLeftEdge(List<Pixel> pattern, double halfWidth, double halfHeight) { var topLeftPixels = pattern.Select(p => p.Position).Where(p => p.Y < halfHeight && p.X < halfWidth).ToArray(); var bottomLeftPixels = pattern.Select(p => p.Position).Where(p => p.Y > halfHeight && p.X < halfWidth).ToArray(); Point topLeft = topLeftPixels.OrderBy(p => p.X).ThenBy(p => p.Y).First(); Point bottomLeft = bottomLeftPixels.OrderByDescending(p => p.Y).ThenBy(p => p.X).First(); int xDiff = bottomLeft.X - topLeft.X; int yDiff = bottomLeft.Y - topLeft.Y; double angle = Math.Atan2(yDiff, xDiff) * 180 / Math.PI; return 90 - angle; } private double GetAngleRightEdge(List<Pixel> pattern, double halfWidth, double halfHeight) { var topRightPixels = pattern.Select(p => p.Position).Where(p => p.Y < halfHeight && p.X > halfWidth).ToArray(); var bottomRightPixels = pattern.Select(p => p.Position).Where(p => p.Y > halfHeight && p.X > halfWidth).ToArray(); Point topRight = topRightPixels.OrderBy(p => p.Y).ThenByDescending(p => p.X).First(); Point bottomRight = bottomRightPixels.OrderByDescending(p => p.X).ThenByDescending(p => p.Y).First(); int xDiff = bottomRight.X - topRight.X; int yDiff = bottomRight.Y - topRight.Y; double angle = Math.Atan2(xDiff, yDiff) * 180 / Math.PI; return Math.Abs(angle); } ,因此在绘制下一个字符时,它不会与前一个和剪切位重叠。因为您可以在输出中看到C略微切入private static Bitmap DeskewImageByIndividualChars(Bitmap bitmap) { IDictionary<Rectangle, Tuple<Bitmap, double>> characters = new CCL().Process(bitmap); Bitmap deskewedBitmap = new Bitmap(bitmap.Width, bitmap.Height, bitmap.PixelFormat); deskewedBitmap.SetResolution(bitmap.HorizontalResolution, bitmap.VerticalResolution); using (Graphics g = Graphics.FromImage(deskewedBitmap)) { g.FillRectangle(Brushes.White, new Rectangle(Point.Empty, deskewedBitmap.Size)); int baseLine = characters.Max(c => c.Key.Bottom); foreach (var character in characters) { int y = character.Key.Y; if (character.Key.Bottom != baseLine) { y += (baseLine - character.Key.Bottom - 1); } using (Bitmap characterBitmap = RotateImage(character.Value.Item1, character.Value.Item2, Color.White)) { g.DrawImage(characterBitmap, new Point(character.Key.X, y)); } } } return deskewedBitmap; }

输出现在非常类似于OP中手动创建的输出。

Output