我使用OpenCV ANN Library实现了神经网络。我是这个领域的新手,我在网上学到了很多东西(主要是StackOverflow)。
我正在使用这个ANN来检测车牌。我使用OpenCV图像处理库做了分割部分,它运行良好。它执行字符分割并将其提供给项目的NN部分。 NN将识别车牌号码。
我有20x30的样本图像,因此我在输入层有600个神经元。由于有36种可能性(0-9,A-Z),我有36个输出神经元。我在隐藏层中保留了100个神经元。 OpenCV的预测功能为每个分段图像提供相同的输出。该输出也显示出一些大的负数(< -1)。我使用cv :: ml :: ANN_MLP :: SIGMOID_SYM作为激活函数。 请不要介意,因为有很多代码被错误评论(我正在试验和错误)。 我需要找出预测函数的输出是什么。谢谢你的帮助。
#include <opencv2/opencv.hpp>
int inputLayerSize = 1;
int outputLayerSize = 1;
int numSamples = 2;
Mat layers = Mat(3, 1, CV_32S);
layers.row(0) =Scalar(600) ;
layers.row(1) = Scalar(20);
layers.row(2) = Scalar(36);
vector<int> layerSizes = { 600,100,36 };
Ptr<ml::ANN_MLP> nnPtr = ml::ANN_MLP::create();
vector <int> n;
//nnPtr->setLayerSizes(3);
nnPtr->setLayerSizes(layers);
nnPtr->setTrainMethod(ml::ANN_MLP::BACKPROP);
nnPtr->setTermCriteria(TermCriteria(cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 1000, 0.00001f));
nnPtr->setActivationFunction(cv::ml::ANN_MLP::SIGMOID_SYM, 1, 1);
nnPtr->setBackpropWeightScale(0.5f);
nnPtr->setBackpropMomentumScale(0.5f);
/*CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(
// terminate the training after either 1000
// iterations or a very small change in the
// network wieghts below the specified value
cvTermCriteria(CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.000001),
// use backpropogation for training
CvANN_MLP_TrainParams::BACKPROP,
// co-efficents for backpropogation training
// (refer to manual)
0.1,
0.1);*/
/* Mat samples(Size(inputLayerSize, numSamples), CV_32F);
samples.at<float>(Point(0, 0)) = 0.1f;
samples.at<float>(Point(0, 1)) = 0.2f;
Mat responses(Size(outputLayerSize, numSamples), CV_32F);
responses.at<float>(Point(0, 0)) = 0.2f;
responses.at<float>(Point(0, 1)) = 0.4f;
*/
//reading chaos image
// we will read the classification numbers into this variable as though it is a vector
// close the traning images file
/*vector<int> layerInfo;
layerInfo=nnPtr->get;
for (int i = 0; i < layerInfo.size(); i++) {
cout << "size of 0" <<layerInfo[i] << endl;
}*/
cv::imshow("chaos", matTrainingImagesAsFlattenedFloats);
// cout <<abc << endl;
matTrainingImagesAsFlattenedFloats.convertTo(matTrainingImagesAsFlattenedFloats, CV_32F);
//matClassificationInts.reshape(1, 496);
matClassificationInts.convertTo(matClassificationInts, CV_32F);
matSamples.convertTo(matSamples, CV_32F);
std::cout << matClassificationInts.rows << " " << matClassificationInts.cols << " ";
std::cout << matTrainingImagesAsFlattenedFloats.rows << " " << matTrainingImagesAsFlattenedFloats.cols << " ";
std::cout << matSamples.rows << " " << matSamples.cols;
imshow("Samples", matSamples);
imshow("chaos", matTrainingImagesAsFlattenedFloats);
Ptr<ml::TrainData> trainData = ml::TrainData::create(matTrainingImagesAsFlattenedFloats, ml::SampleTypes::ROW_SAMPLE, matSamples);
nnPtr->train(trainData);
bool m = nnPtr->isTrained();
if (m)
std::cout << "training complete\n\n";
// cv::Mat matCurrentChar = Mat(cv::Size(matTrainingImagesAsFlattenedFloats.cols, matTrainingImagesAsFlattenedFloats.rows), CV_32F);
// cout << "samples:\n" << samples << endl;
//cout << "\nresponses:\n" << responses << endl;
/* if (!nnPtr->train(trainData))
return 1;*/
/* cout << "\nweights[0]:\n" << nnPtr->getWeights(0) << endl;
cout << "\nweights[1]:\n" << nnPtr->getWeights(1) << endl;
cout << "\nweights[2]:\n" << nnPtr->getWeights(2) << endl;
cout << "\nweights[3]:\n" << nnPtr->getWeights(3) << endl;*/
//predicting
std::vector <cv::String> filename;
cv::String folder = "./plate/";
cv::glob(folder, filename);
if (filename.empty()) { // if unable to open image
std::cout << "error: image not read from file\n\n"; // show error message on command line
return(0); // and exit program
}
String strFinalString;
for (int i = 0; i < filename.size(); i++) {
cv::Mat matTestingNumbers = cv::imread(filename[i]);
cv::Mat matGrayscale; //
cv::Mat matBlurred; // declare more image variables
cv::Mat matThresh; //
cv::Mat matThreshCopy;
cv::Mat matCanny;
//
cv::cvtColor(matTestingNumbers, matGrayscale, CV_BGR2GRAY); // convert to grayscale
matThresh = cv::Mat(cv::Size(matGrayscale.cols, matGrayscale.rows), CV_8UC1);
for (int i = 0; i < matGrayscale.cols; i++) {
for (int j = 0; j < matGrayscale.rows; j++) {
if (matGrayscale.at<uchar>(j, i) <= 130) {
matThresh.at<uchar>(j, i) = 255;
}
else {
matThresh.at<uchar>(j, i) = 0;
}
}
}
// blur
cv::GaussianBlur(matThresh, // input image
matBlurred, // output image
cv::Size(5, 5), // smoothing window width and height in pixels
0); // sigma value, determines how much the image will be blurred, zero makes function choose the sigma value
// filter image from grayscale to black and white
/* cv::adaptiveThreshold(matBlurred, // input image
matThresh, // output image
255, // make pixels that pass the threshold full white
cv::ADAPTIVE_THRESH_GAUSSIAN_C, // use gaussian rather than mean, seems to give better results
cv::THRESH_BINARY_INV, // invert so foreground will be white, background will be black
11, // size of a pixel neighborhood used to calculate threshold value
2); */ // constant subtracted from the mean or weighted mean
// cv::imshow("thresh" + std::to_string(i), matThresh);
matThreshCopy = matThresh.clone();
std::vector<std::vector<cv::Point> > ptContours; // declare a vector for the contours
std::vector<cv::Vec4i> v4iHierarchy;// make a copy of the thresh image, this in necessary b/c findContours modifies the image
cv::Canny(matBlurred, matCanny, 20, 40, 3);
/*std::vector<std::vector<cv::Point> > ptContours; // declare a vector for the contours
std::vector<cv::Vec4i> v4iHierarchy; // declare a vector for the hierarchy (we won't use this in this program but this may be helpful for reference)
cv::findContours(matThreshCopy, // input image, make sure to use a copy since the function will modify this image in the course of finding contours
ptContours, // output contours
v4iHierarchy, // output hierarchy
cv::RETR_EXTERNAL, // retrieve the outermost contours only
cv::CHAIN_APPROX_SIMPLE); // compress horizontal, vertical, and diagonal segments and leave only their end points
/*std::vector<std::vector<cv::Point> > contours_poly(ptContours.size());
std::vector<cv::Rect> boundRect(ptContours.size());
for (int i = 0; i < ptContours.size(); i++)
{
approxPolyDP(cv::Mat(ptContours[i]), contours_poly[i], 3, true);
boundRect[i] = cv::boundingRect(cv::Mat(contours_poly[i]));
}*/
/*for (int i = 0; i < ptContours.size(); i++) { // for each contour
ContourWithData contourWithData; // instantiate a contour with data object
contourWithData.ptContour = ptContours[i]; // assign contour to contour with data
contourWithData.boundingRect = cv::boundingRect(contourWithData.ptContour); // get the bounding rect
contourWithData.fltArea = cv::contourArea(contourWithData.ptContour); // calculate the contour area
allContoursWithData.push_back(contourWithData); // add contour with data object to list of all contours with data
}
for (int i = 0; i < allContoursWithData.size(); i++) { // for all contours
if (allContoursWithData[i].checkIfContourIsValid()) { // check if valid
validContoursWithData.push_back(allContoursWithData[i]); // if so, append to valid contour list
}
}
//sort contours from left to right
std::sort(validContoursWithData.begin(), validContoursWithData.end(), ContourWithData::sortByBoundingRectXPosition);
// std::string strFinalString; // declare final string, this will have the final number sequence by the end of the program
*/
/*for (int i = 0; i < validContoursWithData.size(); i++) { // for each contour
// draw a green rect around the current char
cv::rectangle(matTestingNumbers, // draw rectangle on original image
validContoursWithData[i].boundingRect, // rect to draw
cv::Scalar(0, 255, 0), // green
2); // thickness
cv::Mat matROI = matThresh(validContoursWithData[i].boundingRect); // get ROI image of bounding rect
cv::Mat matROIResized;
cv::resize(matROI, matROIResized, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)); // resize image, this will be more consistent for recognition and storage
*/
cv::Mat matROIFloat;
cv::resize(matThresh, matThresh, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT));
matThresh.convertTo(matROIFloat, CV_32FC1, 1.0 / 255.0); // convert Mat to float, necessary for call to find_nearest
cv::Mat matROIFlattenedFloat = matROIFloat.reshape(1, 1);
cv::Point maxLoc = { 0,0 };
cv::Point minLoc;
cv::Mat output = cv::Mat(cv::Size(36, 1), CV_32F);
vector<float>output2;
// cv::Mat output2 = cv::Mat(cv::Size(36, 1), CV_32F);
nnPtr->predict(matROIFlattenedFloat, output2);
// float max = output.at<float>(0, 0);
int fo = 0;
float m = output2[0];
imshow("predicted input", matROIFlattenedFloat);
// float b = output.at<float>(0, 0);
// cout <<"\n output0,0:"<<b<<endl;
// minMaxLoc(output, 0, 0, &minLoc, &maxLoc, Mat());
// cout << "\noutput:\n" << maxLoc.x << endl;
for (int j = 1; j < 36; j++) {
float value =output2[j];
if (value > m) {
m = value;
fo = j;
}
}
float * p = 0;
p = &m;
cout << "j value in output " << fo << " Max value " << p << endl;
//imshow("output image" + to_string(i), output);
// cout << "\noutput:\n" << minLoc.x << endl;
//float fltCurrentChar = (float)maxLoc.x;
output.release();
m = 0;
fo = 0;
}
// strFinalString = strFinalString + char(int(fltCurrentChar)); // append current char to full string
// cv::imshow("Predict output", output);
/*cv::Point maxLoc = {0,0};
Mat output=Mat (cv::Size(matSamples.cols,matSamples.rows),CV_32F);
nnPtr->predict(matTrainingImagesAsFlattenedFloats, output);
minMaxLoc(output, 0, 0, 0, &maxLoc, 0);
cout << "\noutput:\n" << maxLoc.x << endl;*/
// getchar();
/*for (int i = 0; i < 10;i++) {
for (int j = 0; j < 36; j++) {
if (matCurrentChar.at<float>(i, j) >= 0.6) {
cout << " "<<j<<" ";
}
}
}*/
waitKey(0);
return(0);
}
void gen() {
std::string dir, filepath;
int num, imgArea, minArea;
int pos = 0;
bool f = true;
struct stat filestat;
cv::Mat imgTrainingNumbers;
cv::Mat imgGrayscale;
cv::Mat imgBlurred;
cv::Mat imgThresh;
cv::Mat imgThreshCopy;
cv::Mat matROIResized=cv::Mat (cv::Size(RESIZED_IMAGE_WIDTH,RESIZED_IMAGE_HEIGHT),CV_8UC1);
cv::Mat matROI;
std::vector <cv::String> filename;
std::vector<std::vector<cv::Point> > ptContours;
std::vector<cv::Vec4i> v4iHierarchy;
int count = 0, contoursCount = 0;
matSamples = cv::Mat(cv::Size(36, 496), CV_32FC1);
matTrainingImagesAsFlattenedFloats = cv::Mat(cv::Size(600, 496), CV_32FC1);
for (int j = 0; j <= 35; j++) {
int tmp = j;
cv::String folder = "./Training Data/" + std::to_string(tmp);
cv::glob(folder, filename);
for (int k = 0; k < filename.size(); k++) {
count++;
// If the file is a directory (or is in some way invalid) we'll skip it
// if (stat(filepath.c_str(), &filestat)) continue;
//if (S_ISDIR(filestat.st_mode)) continue;
imgTrainingNumbers = cv::imread(filename[k]);
imgArea = imgTrainingNumbers.cols*imgTrainingNumbers.rows;
// read in training numbers image
minArea = imgArea * 50 / 100;
if (imgTrainingNumbers.empty()) {
std::cout << "error: image not read from file\n\n";
//return(0);
}
cv::cvtColor(imgTrainingNumbers, imgGrayscale, CV_BGR2GRAY);
//cv::equalizeHist(imgGrayscale, imgGrayscale);
imgThresh = cv::Mat(cv::Size(imgGrayscale.cols, imgGrayscale.rows), CV_8UC1);
/*cv::adaptiveThreshold(imgGrayscale,
imgThresh,
255,
cv::ADAPTIVE_THRESH_GAUSSIAN_C,
cv::THRESH_BINARY_INV,
3,
0);
*/
for (int i = 0; i < imgGrayscale.cols; i++) {
for (int j = 0; j < imgGrayscale.rows; j++) {
if (imgGrayscale.at<uchar>(j, i) <= 130) {
imgThresh.at<uchar>(j, i) = 255;
}
else {
imgThresh.at<uchar>(j, i) = 0;
}
}
}
// cv::imshow("imgThresh"+std::to_string(count), imgThresh);
imgThreshCopy = imgThresh.clone();
cv::GaussianBlur(imgThreshCopy,
imgBlurred,
cv::Size(5, 5),
0);
cv::Mat imgCanny;
// cv::Canny(imgBlurred,imgCanny,20,40,3);
cv::findContours(imgBlurred,
ptContours,
v4iHierarchy,
cv::RETR_EXTERNAL,
cv::CHAIN_APPROX_SIMPLE);
for (int i = 0; i < ptContours.size(); i++) {
if (cv::contourArea(ptContours[i]) > MIN_CONTOUR_AREA) {
contoursCount++;
cv::Rect boundingRect = cv::boundingRect(ptContours[i]);
cv::rectangle(imgTrainingNumbers, boundingRect, cv::Scalar(0, 0, 255), 2); // draw red rectangle around each contour as we ask user for input
matROI = imgThreshCopy(boundingRect); // get ROI image of bounding rect
std::string path = "./" + std::to_string(contoursCount) + ".JPG";
cv::imwrite(path, matROI);
// cv::imshow("matROI" + std::to_string(count), matROI);
cv::resize(matROI, matROIResized, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)); // resize image, this will be more consistent for recognition and storage
std::cout << filename[k] << " " << contoursCount << "\n";
//cv::imshow("matROI", matROI);
//cv::imshow("matROIResized"+std::to_string(count), matROIResized);
// cv::imshow("imgTrainingNumbers" + std::to_string(contoursCount), imgTrainingNumbers);
int intChar;
if (j<10)
intChar = j + 48;
else {
intChar = j + 55;
}
/*if (intChar == 27) { // if esc key was pressed
return(0); // exit program
}*/
// if (std::find(intValidChars.begin(), intValidChars.end(), intChar) != intValidChars.end()) { // else if the char is in the list of chars we are looking for . . .
// append classification char to integer list of chars
cv::Mat matImageFloat;
matROIResized.convertTo(matImageFloat,CV_32FC1);// now add the training image (some conversion is necessary first) . . .
//matROIResized.convertTo(matImageFloat, CV_32FC1); // convert Mat to float
cv::Mat matImageFlattenedFloat = matImageFloat.reshape(1, 1);
//matTrainingImagesAsFlattenedFloats.push_back(matImageFlattenedFloat);// flatten
try {
//matTrainingImagesAsFlattenedFloats.push_back(matImageFlattenedFloat);
std::cout << matTrainingImagesAsFlattenedFloats.rows << " " << matTrainingImagesAsFlattenedFloats.cols;
//unsigned char* re;
int ii = 0; // Current column in training_mat
for (int i = 0; i<matImageFloat.rows; i++) {
for (int j = 0; j < matImageFloat.cols; j++) {
matTrainingImagesAsFlattenedFloats.at<float>(contoursCount-1, ii++) = matImageFloat.at<float>(i,j);
}
}
}
catch (std::exception &exc) {
f = false;
exc.what();
}
if (f) {
matClassificationInts.push_back((float)intChar);
matSamples.at<float>(contoursCount-1, j) = 1.0;
}
f = true;
// add to Mat as though it was a vector, this is necessary due to the
// data types that KNearest.train accepts
} // end if
//} // end if
} // end for
}//end i
}//end j
}
答案 0 :(得分:1)
不幸的是,我没有足够的时间来仔细检查代码,但是我可以说最重要的是要训练一个表现良好的模型来预测36个课程,你需要做几件事:
无论如何,最重要的是确保您有一个可靠的机制来验证训练后模型的准确性。如果您还没有这样做,请将一些图像作为单独的测试集放在一边,并在每次实验后,使用训练好的ANN预测每个测试图像以查看准确性。
最后的一般性说明:你要做的事情很复杂。如果您提前花时间并经常重构代码,您将免除大量的麻烦。无论您运行多少实验,如果某些缺陷导致(例如)您的训练数据在某种程度上与您的测试数据有根本的不同,您将永远看不到好的结果。
祝你好运!编辑:我还应该指出,每个输入图像看到相同的结果是训练失败的经典迹象。不幸的是,有很多原因导致这种情况发生,如果没有更清晰的代码和访问您的图像数据,任何人都很难为您隔离。
答案 1 :(得分:1)
我已经解决了没有获得预测输出的问题。问题的产生是因为要训练的输入Mat图像(即matTrainingImagesAsFlattenedFloats)的白色像素值为255.0。发生这种情况是因为我没有正确使用convertTo()。你需要使用convertTo(OutputImage name,CV_32FC1,1.0 / 255.0);像这样将所有像素值转换为255.0到1.0,然后我得到正确的输出。
感谢您的帮助。
答案 2 :(得分:0)