我正在尝试在OpenCV中使用KNN构建OCR。我进行了一些尝试,但结果与预期不同,我认为在检测部分出现了一些错误。第一个问题是无法识别字母,始终将其识别为数字(在检测到时)。此外,有时甚至连数字都无法识别,在这种情况下,它们经常被跳过。到目前为止,下面是我的代码。
培训:
void trainText() {
Mat thr, gray, con;
Mat src = imread("training_chars.png");
cvtColor(src, gray, CV_BGR2GRAY);
threshold(gray,thr,125,255,THRESH_BINARY_INV);
imshow("text", thr);
waitKey();
thr.copyTo(con);
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
Mat sample;
Mat response_array;
findContours( con, contours, hierarchy,CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE );
for(int i = 0; i<contours.size(); i=hierarchy[i][0]) {
Rect r= boundingRect(contours[i]); //Find bounding rect for each contour
rectangle(src,Point(r.x,r.y), Point(r.x+r.width,r.y+r.height), Scalar(0,0,255),2,8,0);
Mat ROI = thr(r); //Crop the image
Mat tmp1, tmp2;
resize(ROI,tmp1, Size(20,30), 0,0,INTER_LINEAR );
tmp1.convertTo(tmp2,CV_32FC1); //convert to float
imshow("src",src);
int c=waitKey(0); // Read corresponding label for contour from keyoard
c-=0x30; // Convert ascii to integer value
response_array.push_back(c); // Store label to a mat
rectangle(src,Point(r.x,r.y), Point(r.x+r.width,r.y+r.height), Scalar(0,255,0),2,8,0);
sample.push_back(tmp2.reshape(1,1)); // Store sample data
}
Mat response,tmp;
tmp=response_array.reshape(1,1); //make continuous
tmp.convertTo(response,CV_32FC1); // Convert to float
FileStorage Data("TrainingData.xml",FileStorage::WRITE); // Store the sample data in a file
Data << "data" << sample;
Data.release();
FileStorage Label("LabelData.xml",FileStorage::WRITE); // Store the label data in a file
Label << "label" << response;
Label.release();
cout<<"Training and Label data created successfully....!! "<<endl;
imshow("src",src);
waitKey(0);
}
测试:
string getText (Mat image) {
Mat thr1,gray1,con1;
Mat src1 = image.clone();
cvtColor(src1,gray1,CV_BGR2GRAY);
threshold(gray1,thr1,125,255,THRESH_BINARY_INV);
thr1.copyTo(con1);
Mat sample1;
Mat response1,tmp1;
FileStorage Data1("TrainingData.xml",FileStorage::READ);
Data1["data"] >> sample1;
Data1.release();
FileStorage Label1("LabelData.xml",FileStorage::READ); // Read label data to a Mat
Label1["label"] >> response1;
Label1.release();
Ptr<ml::KNearest> knn(ml::KNearest::create());
knn->train(sample1, ml::ROW_SAMPLE,response1); // Train with sample and responses
cout<< "Training compleated.....!!" <<endl;
vector<vector<Point>> contours1; // Vector for storing contour
vector<Vec4i> hierarchy1;
//Create input sample by contour finding and cropping
findContours(con1, contours1, hierarchy1, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);
string result;
for(int i = 0; i<contours1.size(); i=hierarchy1[i][0]) {
Rect r= boundingRect(contours1[i]);
rectangle(image, r, Scalar(0,255,0), 2);
Mat ROI = thr1(r);
Mat tmp1, tmp2;
resize(ROI,tmp1, Size(20,30), 0,0,INTER_LINEAR );
tmp1.convertTo(tmp2,CV_32FC1);
Mat bestLabels;
float p = knn -> findNearest(tmp2.reshape(1,1),4, bestLabels);
result = result + " " + char(int(p));
}
imshow("Im with bbox", image);
waitKey();
return result;
}
cout << p << endl;
if(int(p) < 32 || int(p) == 127)
p = 45;
result = result + char(int(p));
您可以看到几乎每个数字都对应一个不可显示的字符。我已经用“-”进行了替换,只是为了立即查看已识别出哪个字符,而不能识别出哪个字符。 将“ B”检测为“ 2”,将“ C”检测为“ 1”,唯一的右检测是“ 3”。
更新2
我发现了问题所在,特别是在培训部分中的这一行:
c-=0x30;
现在检测可以正常工作,但是还有另一个问题。顺序不正确。我正在尝试对轮廓进行排序,但是这样做会导致轮廓错误,这可能是由于测试部分的last for循环中的层次结构导致的:
bool sorting (vector<Point> &a, vector<Point> &b) {
Rect ra = boundingRect(a);
Rect rb = boundingRect(b);
return (ra.x < rb.x);
}
[...]
sort(contours1.begin(), contours1.end(), sorting);
那是我程序的当前行为。