Question

我正在尝试在OpenCV中使用KNN构建OCR。我进行了一些尝试，但结果与预期不同，我认为在检测部分出现了一些错误。第一个问题是无法识别字母，始终将其识别为数字（在检测到时）。此外，有时甚至连数字都无法识别，在这种情况下，它们经常被跳过。到目前为止，下面是我的代码。

培训：

void trainText() {
    Mat thr, gray, con;
    Mat src = imread("training_chars.png");
    cvtColor(src, gray, CV_BGR2GRAY);
    threshold(gray,thr,125,255,THRESH_BINARY_INV);
    imshow("text", thr);
    waitKey();
    thr.copyTo(con);

    vector<vector<Point>> contours;
    vector<Vec4i> hierarchy;
    Mat sample;
    Mat response_array;
    findContours( con, contours, hierarchy,CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE );

    for(int i = 0; i<contours.size(); i=hierarchy[i][0]) {
        Rect r= boundingRect(contours[i]); //Find bounding rect for each contour
        rectangle(src,Point(r.x,r.y), Point(r.x+r.width,r.y+r.height), Scalar(0,0,255),2,8,0);
        Mat ROI = thr(r); //Crop the image
        Mat tmp1, tmp2;
        resize(ROI,tmp1, Size(20,30), 0,0,INTER_LINEAR ); 
        tmp1.convertTo(tmp2,CV_32FC1); //convert to float

        imshow("src",src);

        int c=waitKey(0); // Read corresponding label for contour from keyoard
        c-=0x30;     // Convert ascii to integer value
        response_array.push_back(c); // Store label to a mat
        rectangle(src,Point(r.x,r.y), Point(r.x+r.width,r.y+r.height), Scalar(0,255,0),2,8,0);
        sample.push_back(tmp2.reshape(1,1)); // Store  sample data
    }
    Mat response,tmp;
    tmp=response_array.reshape(1,1); //make continuous
    tmp.convertTo(response,CV_32FC1); // Convert  to float

    FileStorage Data("TrainingData.xml",FileStorage::WRITE); // Store the sample data in a file
    Data << "data" << sample;
    Data.release();

    FileStorage Label("LabelData.xml",FileStorage::WRITE); // Store the label data in a file
    Label << "label" << response;
    Label.release();
    cout<<"Training and Label data created successfully....!! "<<endl;

    imshow("src",src);
    waitKey(0);
}

测试：

string getText (Mat image) {
    Mat thr1,gray1,con1;
    Mat src1 = image.clone();
    cvtColor(src1,gray1,CV_BGR2GRAY);
    threshold(gray1,thr1,125,255,THRESH_BINARY_INV);
    thr1.copyTo(con1);

    Mat sample1;
    Mat response1,tmp1;
    FileStorage Data1("TrainingData.xml",FileStorage::READ);
    Data1["data"] >> sample1;
    Data1.release();

    FileStorage Label1("LabelData.xml",FileStorage::READ); // Read label data to a Mat
    Label1["label"] >> response1;
    Label1.release();

    Ptr<ml::KNearest> knn(ml::KNearest::create());

    knn->train(sample1, ml::ROW_SAMPLE,response1); // Train with sample and responses
    cout<< "Training compleated.....!!" <<endl;

    vector<vector<Point>> contours1; // Vector for storing contour
    vector<Vec4i> hierarchy1;

    //Create input sample by contour finding and cropping
    findContours(con1, contours1, hierarchy1, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);
    string result;

    for(int i = 0; i<contours1.size(); i=hierarchy1[i][0]) {
        Rect r= boundingRect(contours1[i]);
        rectangle(image, r, Scalar(0,255,0), 2);
        Mat ROI = thr1(r);
        Mat tmp1, tmp2;
        resize(ROI,tmp1, Size(20,30), 0,0,INTER_LINEAR );
        tmp1.convertTo(tmp2,CV_32FC1);
        Mat bestLabels;
        float p = knn -> findNearest(tmp2.reshape(1,1),4, bestLabels);
        result = result + " " + char(int(p));
    }
    imshow("Im with bbox", image);
    waitKey();
    return  result;
}

用于训练的图像：

测试图片：

输出：

更新为了查看有效检测到的内容，我对代码进行了一些更改：

        cout << p << endl;
        if(int(p) < 32 || int(p) == 127)
            p = 45;
        result = result + char(int(p));

您可以看到几乎每个数字都对应一个不可显示的字符。我已经用“-”进行了替换，只是为了立即查看已识别出哪个字符，而不能识别出哪个字符。将“ B”检测为“ 2”，将“ C”检测为“ 1”，唯一的右检测是“ 3”。

更新2

我发现了问题所在，特别是在培训部分中的这一行：

c-=0x30;

现在检测可以正常工作，但是还有另一个问题。顺序不正确。我正在尝试对轮廓进行排序，但是这样做会导致轮廓错误，这可能是由于测试部分的last for循环中的层次结构导致的：

    bool sorting (vector<Point> &a, vector<Point> &b) {
        Rect ra = boundingRect(a);
        Rect rb = boundingRect(b);
        return (ra.x < rb.x);
    }
    [...]
    sort(contours1.begin(), contours1.end(), sorting);

那是我程序的当前行为。

不进行排序：正确的轮廓，正确的检测，错误的输出顺序

具有排序功能：轮廓错误（检测错误），顺序正确

OpenCV-C ++：使用KNN构建OCR

0 个答案: