了解HOG功能布局

时间:2015-07-16 12:18:37

标签: c++ opencv computer-vision pattern-matching object-detection

我正在做一个涉及多类物体检测的项目。我的目标是检测以下物体。
1.卡车
汽车
3.人 由于我有三个不同的对象,这意味着我将有三种不同的窗口大小。但块的HOG功能将保持不变。我入侵了OpenCV hog.cpp并创建了两个新函数来计算块的HOG描述符。这是我的代码。

void cv::gpu::HOGDescriptor::getDescriptorsBlock(const GpuMat& img, Size win_stride, GpuMat& descriptors, FileStorage fs3, string fileName, double scale, int width, int height, size_t lev)
{
    CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);

    size_t block_hist_size = getBlockHistogramSize();
    computeBlockHistograms(img);
    Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);

    // Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
    // Size wins_per_img   = numPartsWithin(img.size(), win_size, win_stride);

    // copy block_hists from GPU to CPU/

    float dest_ptr[block_hist_size * blocks_per_img.area()];

    cudaMemcpy( &dest_ptr[0], block_hists.ptr<float>(), block_hist_size *blocks_per_img.area()*sizeof(CV_32F),        cudaMemcpyDeviceToHost); 

    std::cout<<"( "<<width<< " ," << height<< ")"<< std::endl;
    std::cout <<lev<< std::endl;

    // write to yml file

    int level = lev;

    fs3<<"Scale"<<scale;
    fs3 <<"Level"<<level;
    fs3<<"Width"<<width<<"Height"<<height;
    fs3 << "features" << "[";
    for (unsigned int i = 0; i < (block_hist_size * blocks_per_img.area()) ; i++ )
    {
     fs3  << dest_ptr[i];
    }
    fs3 << "]";
}

与获取多尺度

的块描述符类似
void cv::gpu::HOGDescriptor::getDescriptorsMultiScale(const GpuMat& img,
                                              Size win_stride, double scale0, unsigned int count)
{

    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);

    vector<double> level_scale;
    double scale = 1.;
    int levels = 0;

    for (levels = 0; levels < nlevels; levels++)
    {
        level_scale.push_back(scale);
        if (cvRound(img.cols/scale) < win_size.width ||
            cvRound(img.rows/scale) < win_size.height || scale0 <= 1)
            break;
        scale *= scale0;
    }
    levels = std::max(levels, 1);
    level_scale.resize(levels);
    image_scales.resize(levels);

    // open yml file with image ID

    FileStorage fs3;
    char fileName[20];
    GpuMat descriptors;
    sprintf (fileName, "%04d", count);
    fs3.open(fileName, FileStorage::WRITE);

    for (size_t i = 0; i < level_scale.size(); i++)
    {
        scale = level_scale[i];
        Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
        GpuMat smaller_img;

        if (sz == img.size())
            smaller_img = img;
        else
        {
            image_scales[i].create(sz, img.type());
            switch (img.type())
            {
                case CV_8UC1: hog::resize_8UC1(img, image_scales[i]); break;
                case CV_8UC4: hog::resize_8UC4(img, image_scales[i]); break;
            }
            smaller_img = image_scales[i];
        }
        std::cout<<"scale "<<level_scale[i]<<std::endl;

        // calculate descriptors for blocks 
        getDescriptorsBlock( smaller_img, win_stride, descriptors, fs3, fileName, scale, smaller_img.cols, smaller_img.rows, i);

        // detect(smaller_img, locations, hit_threshold, win_stride, padding);
    }
    // close yml file
    fs3.release();

} 

我的问题是只了解块的HOG描述符的布局结构。有人可以分享他的想法

1 个答案:

答案 0 :(得分:0)

通常,使用图像金字塔经常应用于变焦不变。如果你想变得更复杂,请看看本文“用判断性训练的物体检测” 基于零件的模型“[1]。他们在不同尺度上使用HoG非常成功。当然,最初的HoG纸可能有助于理解特征本身的结构[2],如果这更像是你所追求的。

[1] http://vision.ics.uci.edu/papers/FelzenszwalbGMR_PAMI_2009/FelzenszwalbGMR_PAMI_2009.pdf

[2] http://lear.inrialpes.fr/people/triggs/pubs/Dalal-cvpr05.pdf