OpenCV 3.0 - SVM + HOG为半人检测提供相同的错误响应

时间:2016-11-28 12:49:41

标签: c++ opencv svm detection

我在半身探测器中工作,以提高普通人探测器的性能。我知道有更多的方法来处理遮挡,但这是我在学位课程结束时被要求做的事情。我的问题是我没有得到一个好的表现,更重要的是,我得到了一个模式,其中四个代表检测的矩形显示在几乎相同的位置,甚至不代表半身。

我有一组图像,其中有414张由我自己裁剪的上半身图像,用作阳性样本,以及8520张负像。所有这些都是64x64。我提取了HOG描述符如下

int i;      
string imgname, index;

HOGDescriptor hog (Size(64,64), Size(16,16), Size(8,8), Size(8,8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2,false, HOGDescriptor::DEFAULT_NLEVELS, false);

vector<float> pos_rec_descript;
vector<Point> locations;
size_t SizeDesc;
SizeDesc = hog.getDescriptorSize();


FileStorage fpd ("Pos_Descriptors.yml", FileStorage::WRITE);

for (i = 1; i < 415; i++) { // 2416 images in ./pos_rec
    stringstream a;
    a << i;
    imgname = "./pos_rec3/img" + a.str();
    imgname += ".png";

    Mat img = imread(imgname, CV_LOAD_IMAGE_COLOR);
    hog.compute(img, pos_rec_descript, Size (16,16), Size (0,0),locations);
    fpd << "Descriptores" + a.str() << pos_rec_descript;

}
fpd.release();

我对阴性样本做了同样的事情。

然后,我按如下方式训练了SVM。

#define POS 414
#define NEG 8520
#define TOTAL 8934 
#define DESCRIPT 1764   

float trainingData[TOTAL][DESCRIPT];
int labels[TOTAL];

fstream doc;


void set_labels(){

int i;

for (i = 0; i < TOTAL; i++){

if (i < POS) {
labels[i] = 1; 
    }

    else{
labels[i] = -1; 
}
}
return;
}

int main(int, char**)
{

FileStorage fsv ("supvec.yml", FileStorage::WRITE);
FileStorage ftd ("TrainData.yml", FileStorage::WRITE);
//FileStorage flm ("Labels.yml", FileStorage::WRITE);
FileStorage fpd ("../HOG_descriptors/Pos_Descriptors.yml", FileStorage::READ);
FileStorage fnd ("../HOG_descriptors_neg/Neg_Descriptors.yml", FileStorage::READ);

set_labels();

    // Set up training data
    vector <float> pos_D, neg_D, train_D ;
int k = 0;

for (int i = 1; i < POS+1; i++) {   
    stringstream a;
    a << i;
    fpd["Descriptores" + a.str()] >> pos_D;
    for (int j = 0; j < pos_D.size() ; j++){
    train_D.push_back(pos_D[j]);
    }
}
fpd.release();

for (int i = 1; i < NEG+1; i++) {   
    stringstream a;
    a << i;
    fnd["Descriptores" + a.str()] >> neg_D;
    for (int j = 0; j < neg_D.size() ; j++){
    train_D.push_back(neg_D[j]);
    }
}
fnd.release();

for (int i = 0; i < TOTAL; i++){
    for (int j = 0; j < DESCRIPT; j++){
    trainingData[i][j] = train_D[k]; 
    k++;
    }

}

Mat trainingDataMat(TOTAL, DESCRIPT, CV_32FC1, trainingData);
//memcpy(trainingDataMat.data, train_D.data(), train_D.size()*sizeof(float));
    Mat labelsMat(TOTAL, 1, CV_32SC1, labels);

//ftd << "trainingDataMat" << trainingDataMat;
//flm << "labelsMat" << labelsMat;  

// Train the SVM

Ptr<SVM> svm = SVM::create();
    svm->setType(SVM::C_SVC);
    svm->setKernel(SVM::LINEAR);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6));

/*Ptr<TrainData> autoTrainData = TrainData::create(trainingDataMat, ROW_SAMPLE, labelsMat);

ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C);
ParamGrid gammaGrid = SVM::getDefaultGrid(SVM::GAMMA);
ParamGrid pGrid = SVM::getDefaultGrid(SVM::P);
pGrid.logStep = 1;   
ParamGrid nuGrid = SVM::getDefaultGrid(SVM::NU);
nuGrid.logStep = 1;
ParamGrid coeffGrid = SVM::getDefaultGrid(SVM::COEF);
coeffGrid.logStep = 1;
ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE);
degreeGrid.logStep = 1; */

cout << "Está entrenando..." << endl;   

//svm->trainAuto(autoTrainData, 10, Cgrid, gammaGrid, pGrid, nuGrid, coeffGrid, degreeGrid, false);

    svm->train(trainingDataMat, ROW_SAMPLE, labelsMat);

svm->save("SVM3_WS16_P0_LINEAR.yml");

我已尝试使用LINEAR和RBF内核(这就是为什么你可以看到代码的自动调整部分,我曾经在SVM类型之间进行交换),但它们似乎都不起作用。实际上,它们给出了几乎相同的响应,这让我觉得,可能是训练阶段或检测阶段(下面的代码)正在破坏整个项目。

这是我为HOG探测器加载SVM并尝试覆盖图像的方法

using namespace cv;
using namespace std;
using namespace cv::ml;



// static void help()
// {
//     printf(
//             "\nDemonstrate the use of the HoG descriptor using\n"
//             "  HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"
//             "Usage:\n"
//             "./peopledetect (<image_filename> | <image_list>.txt)\n\n");
// }

void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector );

void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector )
{
// get the support vectors
Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction(0, alpha, svidx);

CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
         (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
CV_Assert( sv.type() == CV_32F );
hog_detector.clear();

hog_detector.resize(sv.cols + 1);
memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
hog_detector[sv.cols] = (float)-rho;
}


int main(int argc, char** argv)
{
Mat img;
FILE* f = 0;
char _filename[1024];

if( argc == 1 )
{
    printf("Usage: peopledetect (People_imgs | People_imgs.txt)\n");
    return 0;
}
img = imread(argv[1]);

if( img.data )
{
    strcpy(_filename, argv[1]);
}
else
{
    f = fopen(argv[1], "rt");
    if(!f)
    {
        fprintf( stderr, "ERROR: the specified file could not be loaded\n");
        return -1;
    }
}  

// Load SVM

Ptr<SVM> svm = SVM::create();

svm = cv::Algorithm::load<ml::SVM>("../SVM_Train/SVM3_WS16_P0_LINEAR.yml");


HOGDescriptor hog (Size(64,64), Size(16,16), Size(8,8), Size(8,8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2,false, HOGDescriptor::DEFAULT_NLEVELS, false);

vector <float> hog_detector;
get_svm_detector (svm, hog_detector);

hog.setSVMDetector(hog_detector);
namedWindow("people detector", 1);

for(;;)
{
    char* filename = _filename;
    if(f)
    {
        if(!fgets(filename, (int)sizeof(_filename)-2, f))
            break;
        //while(*filename && isspace(*filename))
        //  ++filename;
        if(filename[0] == '#')
            continue;
        int l = (int)strlen(filename);
        while(l > 0 && isspace(filename[l-1]))
            --l;
        filename[l] = '\0';
        img = imread(filename);
    }
    printf("%s:\n", filename);
    if(!img.data)
        continue;

    fflush(stdout);
    vector<Rect> found, found_filtered, searchLocations;
vector<double> found_weights;
    double t = (double)getTickCount();
    // run the detector with default parameters. to get a higher hit-rate
    // (and more false alarms, respectively), decrease the hitThreshold and
    // groupThreshold (set groupThreshold to 0 to turn off the grouping completely).
    hog.detectMultiScale(img, found, found_weights, 0, Size(16,16), Size(0,0), 1.01, 2);
    //hog.detect(img, found, 0, Size(16,16), Size(0,0), searchLocations);
    t = (double)getTickCount() - t;
    printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());
    size_t i, j;
    for( i = 0; i < found.size(); i++ )
    {
        Rect r = found[i];
        for( j = 0; j < found.size(); j++ )
            if( j != i && (r & found[j]) == r)
                break;
        if( j == found.size() )
            found_filtered.push_back(r);
    }
    for( i = 0; i < found_filtered.size(); i++ )
    {
        Rect r = found_filtered[i];
        // the HOG detector returns slightly larger rectangles than the real objects.
        // so we slightly shrink the rectangles to get a nicer output.
        r.x += cvRound(r.width*0.1);
        r.width = cvRound(r.width*0.7);
        r.y += cvRound(r.height*0.07);
        r.height = cvRound(r.height*0.7);
        rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 2);
    imshow("people detector", img);
    waitKey(0);        
}
    //imshow("people detector", img);
//string imgname = "./Responses/Win_Stride16_4.png";
//imwrite(imgname, img);
    int c = waitKey(0) & 255;
    if( c == 'q' || c == 'Q' || !f)
        break;
}
if(f)
    fclose(f);
return 0;
}

我已经检查了描述符的所有维度,每个Mat似乎都没问题。但是当我使用detectMultiScale时,它会显示如下内容:

Image 1: It's strange because is missing lots of detections

Image 2: Here I realized there was a kind of pattern with this 4 rects

我的问题是无论我改变什么(在detectMultiScale中使用描述符,内核,winStride和Padding),总会有非常相似的响应,没有任何迹象表明那里有正确的检测。

我不太确定我是如何向HOG提供支持向量的,但这是我发现它的唯一方法(在StackOverflow的帖子中找到它)。

如果你们中的任何人知道这里发生了什么,以及为什么回复没有从一种配置改变到另一种配置,我将非常感谢。这段代码让我头疼几周以来。我一直在改变功能上的参数,改变HOG,改变内核,尝试不同的图像集,但似乎没有任何改变最终结果。

0 个答案:

没有答案