c# - 如何将图像的各个字母旋转到正确的方向以获得最佳的OCR？

在my previous question中，我转换了此图像:

到这个:

Tesseract OCR将其解释为:

1O351

在图像周围放置框架

c# - 如何将图像的各个字母旋转到正确的方向以获得最佳的OCR？-LMLPHP

实际上改善了OCR结果。

 1CB51

但是，我需要全部5个字符才能正确进行OCR，因此，作为一个实验，我使用Paint.NET旋转每个字母并将其对齐为正确的方向:

得出正确的答案:

1CB52

我将如何在C#中执行此更正？

我已经对各种文本对齐算法进行了一些研究，但是它们都假设源图像中存在文本行，可以从中得出旋转角度的行，但是这些行之间已经包含了适当的间距和方向关系这些信。

最佳答案

您可以使用以下code project article中的代码来分割每个单独的字符。但是，当尝试分别对这些字符进行偏移校正时，您得到的任何结果都不会很好，因为没有太多的信息可利用。

我尝试使用AForge.NET s HoughLineTransformation class，并且获得的角度范围为80-90度。因此，我尝试使用以下代码对它们进行去偏斜:

private static Bitmap DeskewImageByIndividualChars(Bitmap targetBitmap)
{
    IDictionary<Rectangle, Bitmap> characters = new CCL().Process(targetBitmap);

    using (Graphics g = Graphics.FromImage(targetBitmap))
    {
        foreach (var character in characters)
        {
            double angle;

            BitmapData bitmapData = character.Value.LockBits(new Rectangle(Point.Empty, character.Value.Size), ImageLockMode.ReadWrite, PixelFormat.Format8bppIndexed);
            try
            {
                HoughLineTransformation hlt = new HoughLineTransformation();
                hlt.ProcessImage(bitmapData);

                angle = hlt.GetLinesByRelativeIntensity(0.5).Average(l => l.Theta);
            }
            finally
            {
                character.Value.UnlockBits(bitmapData);
            }

            using (Bitmap bitmap = RotateImage(character.Value, 90 - angle, Color.White))
            {
                g.DrawImage(bitmap, character.Key.Location);
            }
        }
    }

    return targetBitmap;
}

使用 RotateImage method taken from here.但是，结果似乎并不是最好的。也许您可以尝试使它们变得更好。

这是代码项目文章中的代码，供您引用。我对其进行了一些更改，以使其表现得更安全，例如在try-finally周围添加LockBits并使用using语句正确处理对象等。

using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.Linq;

namespace ConnectedComponentLabeling
{
    public class CCL
    {
        private Bitmap _input;
        private int[,] _board;

        public IDictionary<Rectangle, Bitmap> Process(Bitmap input)
        {
            _input = input;
            _board = new int[_input.Width, _input.Height];

            Dictionary<int, List<Pixel>> patterns = Find();
            var images = new Dictionary<Rectangle, Bitmap>();

            foreach (KeyValuePair<int, List<Pixel>> pattern in patterns)
            {
                using (Bitmap bmp = CreateBitmap(pattern.Value))
                {
                    images.Add(GetBounds(pattern.Value), (Bitmap)bmp.Clone());
                }
            }

            return images;
        }

        protected virtual bool CheckIsBackGround(Pixel currentPixel)
        {
            return currentPixel.color.A == 255 && currentPixel.color.R == 255 && currentPixel.color.G == 255 && currentPixel.color.B == 255;
        }

        private unsafe Dictionary<int, List<Pixel>> Find()
        {
            int labelCount = 1;
            var allLabels = new Dictionary<int, Label>();

            BitmapData imageData = _input.LockBits(new Rectangle(0, 0, _input.Width, _input.Height), ImageLockMode.ReadOnly, PixelFormat.Format24bppRgb);
            try
            {
                int bytesPerPixel = 3;

                byte* scan0 = (byte*)imageData.Scan0.ToPointer();
                int stride = imageData.Stride;

                for (int i = 0; i < _input.Height; i++)
                {
                    byte* row = scan0 + (i * stride);

                    for (int j = 0; j < _input.Width; j++)
                    {
                        int bIndex = j * bytesPerPixel;
                        int gIndex = bIndex + 1;
                        int rIndex = bIndex + 2;

                        byte pixelR = row[rIndex];
                        byte pixelG = row[gIndex];
                        byte pixelB = row[bIndex];

                        Pixel currentPixel = new Pixel(new Point(j, i), Color.FromArgb(pixelR, pixelG, pixelB));

                        if (CheckIsBackGround(currentPixel))
                        {
                            continue;
                        }

                        IEnumerable<int> neighboringLabels = GetNeighboringLabels(currentPixel);
                        int currentLabel;

                        if (!neighboringLabels.Any())
                        {
                            currentLabel = labelCount;
                            allLabels.Add(currentLabel, new Label(currentLabel));
                            labelCount++;
                        }
                        else
                        {
                            currentLabel = neighboringLabels.Min(n => allLabels[n].GetRoot().Name);
                            Label root = allLabels[currentLabel].GetRoot();

                            foreach (var neighbor in neighboringLabels)
                            {
                                if (root.Name != allLabels[neighbor].GetRoot().Name)
                                {
                                    allLabels[neighbor].Join(allLabels[currentLabel]);
                                }
                            }
                        }

                        _board[j, i] = currentLabel;
                    }
                }
            }
            finally
            {
                _input.UnlockBits(imageData);
            }

            Dictionary<int, List<Pixel>> patterns = AggregatePatterns(allLabels);

            patterns = RemoveIntrusions(patterns, _input.Width, _input.Height);

            return patterns;
        }

        private Dictionary<int, List<Pixel>> RemoveIntrusions(Dictionary<int, List<Pixel>> patterns, int width, int height)
        {
            var patternsCleaned = new Dictionary<int, List<Pixel>>();

            foreach (var pattern in patterns)
            {
                bool bad = false;
                foreach (Pixel item in pattern.Value)
                {
                    //Horiz
                    if (item.Position.X == 0)
                        bad = true;

                    else if (item.Position.Y == width - 1)
                        bad = true;

                    //Vert
                    else if (item.Position.Y == 0)
                        bad = true;

                    else if (item.Position.Y == height - 1)
                        bad = true;
                }

                if (!bad)
                    patternsCleaned.Add(pattern.Key, pattern.Value);

            }

            return patternsCleaned;
        }

        private IEnumerable<int> GetNeighboringLabels(Pixel pix)
        {
            var neighboringLabels = new List<int>();

            for (int i = pix.Position.Y - 1; i <= pix.Position.Y + 2 && i < _input.Height - 1; i++)
            {
                for (int j = pix.Position.X - 1; j <= pix.Position.X + 2 && j < _input.Width - 1; j++)
                {
                    if (i > -1 && j > -1 && _board[j, i] != 0)
                    {
                        neighboringLabels.Add(_board[j, i]);
                    }
                }
            }

            return neighboringLabels;
        }

        private Dictionary<int, List<Pixel>> AggregatePatterns(Dictionary<int, Label> allLabels)
        {
            var patterns = new Dictionary<int, List<Pixel>>();

            for (int i = 0; i < _input.Height; i++)
            {
                for (int j = 0; j < _input.Width; j++)
                {
                    int patternNumber = _board[j, i];
                    if (patternNumber != 0)
                    {
                        patternNumber = allLabels[patternNumber].GetRoot().Name;

                        if (!patterns.ContainsKey(patternNumber))
                        {
                            patterns[patternNumber] = new List<Pixel>();
                        }

                        patterns[patternNumber].Add(new Pixel(new Point(j, i), Color.Black));
                    }
                }
            }

            return patterns;
        }

        private unsafe Bitmap CreateBitmap(List<Pixel> pattern)
        {
            int minX = pattern.Min(p => p.Position.X);
            int maxX = pattern.Max(p => p.Position.X);

            int minY = pattern.Min(p => p.Position.Y);
            int maxY = pattern.Max(p => p.Position.Y);

            int width = maxX + 1 - minX;
            int height = maxY + 1 - minY;

            Bitmap bmp = DrawFilledRectangle(width, height);

            BitmapData imageData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb);
            try
            {
                byte* scan0 = (byte*)imageData.Scan0.ToPointer();
                int stride = imageData.Stride;

                foreach (Pixel pix in pattern)
                {
                    scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride] = pix.color.B;
                    scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride + 1] = pix.color.G;
                    scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride + 2] = pix.color.R;
                }
            }
            finally
            {
                bmp.UnlockBits(imageData);
            }

            return bmp;
        }

        private Bitmap DrawFilledRectangle(int x, int y)
        {
            Bitmap bmp = new Bitmap(x, y);
            using (Graphics graph = Graphics.FromImage(bmp))
            {
                Rectangle ImageSize = new Rectangle(0, 0, x, y);
                graph.FillRectangle(Brushes.White, ImageSize);
            }

            return bmp;
        }

        private Rectangle GetBounds(List<Pixel> pattern)
        {
            var points = pattern.Select(x => x.Position);

            var x_query = points.Select(p => p.X);
            int xmin = x_query.Min();
            int xmax = x_query.Max();

            var y_query = points.Select(p => p.Y);
            int ymin = y_query.Min();
            int ymax = y_query.Max();

            return new Rectangle(xmin, ymin, xmax - xmin, ymax - ymin);
        }
    }
}

通过上面的代码，我得到了以下输入/输出:

如您所见，B旋转得非常好，但其他旋转效果不佳。

尝试对单个字符进行去歪斜的另一种方法是使用上面的分段例程在该位置找到位置。然后将每个单独的字符分别传递到您的识别引擎，以查看这是否可以改善您的结果。

我使用以下方法从List<Pixel>类内部使用CCL查找字符的角度。它通过找到“左下方”和“右下方”点之间的角度来工作。如果角色以其他方式旋转，我还没有测试它是否有效。

private double GetAngle(List<Pixel> pattern)
{
    var pixels = pattern.Select(p => p.Position).ToArray();

    Point bottomLeft = pixels.OrderByDescending(p => p.Y).ThenBy(p => p.X).First();
    Point rightBottom = pixels.OrderByDescending(p => p.X).ThenByDescending(p => p.Y).First();

    int xDiff = rightBottom.X - bottomLeft.X;
    int yDiff = rightBottom.Y - bottomLeft.Y;

    double angle = Math.Atan2(yDiff, xDiff) * 180 / Math.PI;

    return -angle;
}

请注意，我的绘图代码有些破损，因此这就是5在右侧被切除的原因，但是此代码会产生以下输出:

请注意，由于曲率，B和5旋转得超出您的预期。

通过使用以下代码，从左右边缘获取角度，然后选择最佳角度，旋转似乎更好。请注意，我只用需要顺时针旋转的字母进行了测试，因此，如果需要以相反的方式旋转，可能效果不太好。

这也使像素“四分之一”，以便从其自己的象限中选择每个像素，以免得到太近的两个像素。

选择最佳角度的想法是，如果它们彼此相似，目前彼此之间在1.5度以内，但可以轻松进行更新，求平均值。否则，我们选择最接近零的那个。

private double GetAngle(List<Pixel> pattern, Rectangle bounds)
{
    int halfWidth = bounds.X + (bounds.Width / 2);
    int halfHeight = bounds.Y + (bounds.Height / 2);

    double leftEdgeAngle = GetAngleLeftEdge(pattern, halfWidth, halfHeight);
    double rightEdgeAngle = GetAngleRightEdge(pattern, halfWidth, halfHeight);

    if (Math.Abs(leftEdgeAngle - rightEdgeAngle) <= 1.5)
    {
        return (leftEdgeAngle + rightEdgeAngle) / 2d;
    }

    if (Math.Abs(leftEdgeAngle) > Math.Abs(rightEdgeAngle))
    {
        return rightEdgeAngle;
    }
    else
    {
        return leftEdgeAngle;
    }
}

private double GetAngleLeftEdge(List<Pixel> pattern, double halfWidth, double halfHeight)
{
    var topLeftPixels = pattern.Select(p => p.Position).Where(p => p.Y < halfHeight && p.X < halfWidth).ToArray();
    var bottomLeftPixels = pattern.Select(p => p.Position).Where(p => p.Y > halfHeight && p.X < halfWidth).ToArray();

    Point topLeft = topLeftPixels.OrderBy(p => p.X).ThenBy(p => p.Y).First();
    Point bottomLeft = bottomLeftPixels.OrderByDescending(p => p.Y).ThenBy(p => p.X).First();

    int xDiff = bottomLeft.X - topLeft.X;
    int yDiff = bottomLeft.Y - topLeft.Y;

    double angle = Math.Atan2(yDiff, xDiff) * 180 / Math.PI;

    return 90 - angle;
}

private double GetAngleRightEdge(List<Pixel> pattern, double halfWidth, double halfHeight)
{
    var topRightPixels = pattern.Select(p => p.Position).Where(p => p.Y < halfHeight && p.X > halfWidth).ToArray();
    var bottomRightPixels = pattern.Select(p => p.Position).Where(p => p.Y > halfHeight && p.X > halfWidth).ToArray();

    Point topRight = topRightPixels.OrderBy(p => p.Y).ThenByDescending(p => p.X).First();
    Point bottomRight = bottomRightPixels.OrderByDescending(p => p.X).ThenByDescending(p => p.Y).First();

    int xDiff = bottomRight.X - topRight.X;
    int yDiff = bottomRight.Y - topRight.Y;

    double angle = Math.Atan2(xDiff, yDiff) * 180 / Math.PI;

    return Math.Abs(angle);
}

现在这将产生以下输出，同样，我的图形代码也被稍微破坏了。请注意，C似乎没有很好的去歪斜，但是仔细观察，正是这种形状导致了这种情况的发生。

我改进了绘图代码，还尝试将字符放到相同的基线上:

private static Bitmap DeskewImageByIndividualChars(Bitmap bitmap)
{
    IDictionary<Rectangle, Tuple<Bitmap, double>> characters = new CCL().Process(bitmap);

    Bitmap deskewedBitmap = new Bitmap(bitmap.Width, bitmap.Height, bitmap.PixelFormat);
    deskewedBitmap.SetResolution(bitmap.HorizontalResolution, bitmap.VerticalResolution);

    using (Graphics g = Graphics.FromImage(deskewedBitmap))
    {
        g.FillRectangle(Brushes.White, new Rectangle(Point.Empty, deskewedBitmap.Size));

        int baseLine = characters.Max(c => c.Key.Bottom);
        foreach (var character in characters)
        {
            int y = character.Key.Y;
            if (character.Key.Bottom != baseLine)
            {
                y += (baseLine - character.Key.Bottom - 1);
            }

            using (Bitmap characterBitmap = RotateImage(character.Value.Item1, character.Value.Item2, Color.White))
            {
                g.DrawImage(characterBitmap, new Point(character.Key.X, y));
            }
        }
    }

    return deskewedBitmap;
}

然后产生以下输出。请注意，由于要使用旋转前底部来进行计算，因此每个字符的基准并不完全相同。为了使用后旋转基线来改进代码，将需要使用。在执行基线之前对图像进行阈值处理也将有所帮助。

另一个改进将是计算每个旋转的字符位置的Right，因此在绘制下一个字符时，它不与先前的字符重叠并且将其切掉。因为如您在输出中所见，2稍微切入了5。

现在的输出与OP中手动创建的输出非常相似。

关于c# - 如何将图像的各个字母旋转到正确的方向以获得最佳的OCR？，我们在Stack Overflow上找到一个类似的问题：https://stackoverflow.com/questions/41839477/