从图像中删除线条

时间:2016-01-23 04:16:47

标签: c++ opencv image-processing

我是OpenCV的初学者,我需要删除图像中的水平和垂直线,以便只保留文本(这些行在ocr中提取文本时会造成麻烦)。我试图从营养事实表中提取文本。任何人都可以帮助我吗?

Nutrient Fact Table

1 个答案:

答案 0 :(得分:3)

这是一个有趣的问题,所以我试了一下。下面我将向您展示如何提取和删除水平和垂直线。你可以从中推断出来。此外,为了节省时间,我没有预先处理你的图像来裁剪背景,这是一个改进的途径。

结果:result 代码(编辑:添加垂直线):

#include <iostream>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
int main(int, char** argv)
{
    // Load the image
    Mat src = imread(argv[1]);
    // Check if image is loaded fine
    if(!src.data)
        cerr << "Problem loading image!!!" << endl;
    Mat gray;
    if (src.channels() == 3)
    {
        cvtColor(src, gray, CV_BGR2GRAY);
    }
    else
    {
        gray = src;
    }

    //inverse binary img
    Mat bw;
    //this will hold the result, image to be passed to OCR
    Mat fin;
    //I find OTSU binarization best for text.
    //Would perform better if background had been cropped out
    threshold(gray, bw, 0, 255, THRESH_BINARY_INV | THRESH_OTSU);
    threshold(gray, fin, 0, 255, THRESH_BINARY | THRESH_OTSU);
    imshow("binary", bw);
    Mat dst;
    Canny( fin, dst, 50, 200, 3 );
    Mat str = getStructuringElement(MORPH_RECT, Size(3,3));
    dilate(dst, dst, str, Point(-1, -1), 3);
    imshow("dilated_canny", dst);
    //bitwise_and w/ canny image helps w/ background noise
    bitwise_and(bw, dst, dst);
    imshow("and", dst);
    Mat horizontal = dst.clone();
    Mat vertical = dst.clone();
    fin = ~dst;

    //Image that will be horizontal lines
    Mat horizontal = bw.clone();
    //Selected this value arbitrarily
    int horizontalsize = horizontal.cols / 30;
    Mat horizontalStructure = getStructuringElement(MORPH_RECT, Size(horizontalsize,1));
    erode(horizontal, horizontal, horizontalStructure, Point(-1, -1));
    dilate(horizontal, horizontal, horizontalStructure, Point(-1, -1), 1);
    imshow("horizontal_lines", horizontal);

    //Need to find horizontal contours, so as to not damage letters
    vector<Vec4i> hierarchy;
    vector<vector<Point> >contours;
    findContours(horizontal, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_NONE);
    for (const auto& c : contours)
    {
        Rect r = boundingRect(c);

        float percentage_height = (float)r.height / (float)src.rows;
        float percentage_width = (float)r.width / (float)src.cols;

        //These exclude contours that probably are not dividing lines
        if (percentage_height > 0.05)
            continue;

        if (percentage_width < 0.50)
            continue;
        //fills in line with white rectange
        rectangle(fin, r, Scalar(255,255,255), CV_FILLED);
    }

    int verticalsize = vertical.rows / 30;
    Mat verticalStructure = getStructuringElement(MORPH_RECT, Size(1,verticalsize));
    erode(vertical, vertical, verticalStructure, Point(-1, -1));
    dilate(vertical, vertical, verticalStructure, Point(-1, -1), 1);
    imshow("verticalal", vertical); 

    findContours(vertical, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_NONE);
    for (const auto& c : contours)
    {
        Rect r = boundingRect(c);

        float percentage_height = (float)r.height / (float)src.rows;
        float percentage_width = (float)r.width / (float)src.cols;

        //These exclude contours that probably are not dividing lines
        if (percentage_width > 0.05)
            continue;

        if (percentage_height < 0.50)
            continue;
        //fills in line with white rectange
        rectangle(fin, r, Scalar(255,255,255), CV_FILLED);
    }

    imshow("Result", fin);
    waitKey(0);
    return 0;
}

这种方法的局限性在于线条必须笔直。由于底线的曲线,它会略微削减成E&#34; E&#34;在&#34;能源&#34;。也许通过像建议的那样进行霍夫线检测(我从未使用它),可以设计出类似但更强大的方法。此外,用矩形填充线可能不是最好的方法。