Question

我想要做的只是在一个GTX1060显卡中运行5个线程，但是失败了： [运行环境]： Linux Ubuntu 16.04 + CUDA 8.0 + OpenCV3.1 + GTX1060

[错误]：

OpenCV错误：Gpu API调用（遇到非法内存访问）在linesAccum_gpu中，文件 /home/weiran/DB_OpenCV/opencv-3.1.0/modules/cudaimgproc/src/cuda/hough_lines.cu，第143行OpenCV错误：Gpu API调用（非法内存访问是遇到）在linesAccum_gpu文件中 /home/weiran/DB_OpenCV/opencv-3.1.0/modules/cudaimgproc/src/cuda/hough_lines.cu，第143行终止被称为递归

[编号]：

#include <cmath>
#include <iostream>

#include "opencv2/core.hpp"
#include <opencv2/core/utility.hpp>
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"

#include "pthread.h"

using namespace std;
using namespace cv;
using namespace cv::cuda;

#define PI 3.1415926
#define NUM_LOOP 500

void carReviseInterface_gpu(Mat src, Mat &dst)
{
        // Canny
        Mat contours;
        cuda::GpuMat d_src(src);
        //CannyDetect(src, contours);
        cuda::GpuMat gray_src;
        cv::cuda::cvtColor(d_src, gray_src, COLOR_BGR2GRAY);

        cuda::GpuMat d_contours;

        Ptr<cuda::CannyEdgeDetector> cuCanny = cuda::createCannyEdgeDetector(74, 147);
        cuCanny->detect(gray_src, d_contours);///

        //d_contours.download(contours);

        //imshow("1 gpu canny", contours);
        // 
        //std::vector<cv::Vec4i> lines;
        //HoughLineDetect(src, lines);

        ///////////////////
        //GpuMat d_src(contours);
        GpuMat d_lines;
        {
                const int64 start = getTickCount();

                Ptr<cuda::HoughSegmentDetector> hough = cuda::createHoughSegmentDetector(1.0f, (float)(CV_PI / 180.0f), 129, 20);

                hough->detect(d_contours, d_lines);        // d_src

                const double timeSec = (getTickCount() - start) / getTickFrequency();
                //cout << "GPU Time : " << timeSec * 1000 << " ms" << endl;
                //cout << "GPU Found : " << d_lines.cols << endl;
        }
        vector<Vec4i> lines_gpu;
        if (!d_lines.empty())
        {
                lines_gpu.resize(d_lines.cols);
                Mat h_lines(1, d_lines.cols, CV_32SC4, &lines_gpu[0]);
                d_lines.download(h_lines);
        }

        //for (size_t i = 0; i < lines_gpu.size(); ++i)
        //{
        //        Vec4i l = lines_gpu[i];
        //        line(src, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0, 0, 255), 3, LINE_AA);
        //}

        //imshow("2 gpu hough ", src);


}


void *threadFun(void *arg)
{
    Mat src_gpu = imread("/home/weiran/DB_Multimedia/plate2.png");//plate2.png
    Mat res, res_gpu;
    for (int i = 0; i < NUM_LOOP; ++i) {
        carReviseInterface_gpu(src_gpu, res_gpu);
    }
}

int main()
{
    pthread_t pth[5];
    memset(&pth, 0, sizeof(pth));
        Mat src = imread("/home/weiran/DB_Multimedia/plate2.png");//plate2.png
        Mat res, res_gpu;
        Mat src_gpu = src.clone();
        imshow("0 src", src);

        cv::Size size;
        size.width = 320;
        size.height = 240;


        const int64 start2 = getTickCount();
    for (int i = 0; i < 2; ++i)
    {
        pthread_create(&pth[i], NULL, threadFun, NULL);
    }
//        for (int i = 0; i < NUM_LOOP; ++i)
//        {
//                carReviseInterface_gpu(src_gpu, res_gpu);
//                //cuResize(src_gpu, res_gpu, size);
//        }
        const double timeSec2 = (getTickCount() - start2) / getTickFrequency();
        cout << "GPU Time : " << (timeSec2 * 1000) / NUM_LOOP << " ms" << endl;


        imshow("8 cpu src", src);
//        imshow("9 cpu res", res);

        imshow("8 gpu src", src_gpu);
//        imshow("9 gpu res", res_gpu);


        waitKey(0);
        return 0;
}

我该怎么办？在此先感谢您的帮助〜

Answer 1

OpenCV中多线程的关键是仅使用带有stream参数的GPU函数。

cv::cuda::Stream myStream;
gpuImage.download(cpuImage, myStream);

从多个线程访问时，不带流的OpenCV cuda函数可能存在缓冲区覆盖问题。

非法内存访问：一个GPU的多线程（OpenCV3.1 + CUDA 8.0）（完整代码）

1 个答案: