我想要做的只是在一个GTX1060显卡中运行5个线程,但是失败了: [运行环境]: Linux Ubuntu 16.04 + CUDA 8.0 + OpenCV3.1 + GTX1060
[错误]:
OpenCV错误:Gpu API调用(遇到非法内存访问) 在linesAccum_gpu中,文件 /home/weiran/DB_OpenCV/opencv-3.1.0/modules/cudaimgproc/src/cuda/hough_lines.cu, 第143行OpenCV错误:Gpu API调用(非法内存访问是 遇到)在linesAccum_gpu文件中 /home/weiran/DB_OpenCV/opencv-3.1.0/modules/cudaimgproc/src/cuda/hough_lines.cu, 第143行 终止被称为递归
[编号]:
#include <cmath>
#include <iostream>
#include "opencv2/core.hpp"
#include <opencv2/core/utility.hpp>
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"
#include "pthread.h"
using namespace std;
using namespace cv;
using namespace cv::cuda;
#define PI 3.1415926
#define NUM_LOOP 500
void carReviseInterface_gpu(Mat src, Mat &dst)
{
// Canny
Mat contours;
cuda::GpuMat d_src(src);
//CannyDetect(src, contours);
cuda::GpuMat gray_src;
cv::cuda::cvtColor(d_src, gray_src, COLOR_BGR2GRAY);
cuda::GpuMat d_contours;
Ptr<cuda::CannyEdgeDetector> cuCanny = cuda::createCannyEdgeDetector(74, 147);
cuCanny->detect(gray_src, d_contours);///
//d_contours.download(contours);
//imshow("1 gpu canny", contours);
//
//std::vector<cv::Vec4i> lines;
//HoughLineDetect(src, lines);
///////////////////
//GpuMat d_src(contours);
GpuMat d_lines;
{
const int64 start = getTickCount();
Ptr<cuda::HoughSegmentDetector> hough = cuda::createHoughSegmentDetector(1.0f, (float)(CV_PI / 180.0f), 129, 20);
hough->detect(d_contours, d_lines); // d_src
const double timeSec = (getTickCount() - start) / getTickFrequency();
//cout << "GPU Time : " << timeSec * 1000 << " ms" << endl;
//cout << "GPU Found : " << d_lines.cols << endl;
}
vector<Vec4i> lines_gpu;
if (!d_lines.empty())
{
lines_gpu.resize(d_lines.cols);
Mat h_lines(1, d_lines.cols, CV_32SC4, &lines_gpu[0]);
d_lines.download(h_lines);
}
//for (size_t i = 0; i < lines_gpu.size(); ++i)
//{
// Vec4i l = lines_gpu[i];
// line(src, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0, 0, 255), 3, LINE_AA);
//}
//imshow("2 gpu hough ", src);
}
void *threadFun(void *arg)
{
Mat src_gpu = imread("/home/weiran/DB_Multimedia/plate2.png");//plate2.png
Mat res, res_gpu;
for (int i = 0; i < NUM_LOOP; ++i) {
carReviseInterface_gpu(src_gpu, res_gpu);
}
}
int main()
{
pthread_t pth[5];
memset(&pth, 0, sizeof(pth));
Mat src = imread("/home/weiran/DB_Multimedia/plate2.png");//plate2.png
Mat res, res_gpu;
Mat src_gpu = src.clone();
imshow("0 src", src);
cv::Size size;
size.width = 320;
size.height = 240;
const int64 start2 = getTickCount();
for (int i = 0; i < 2; ++i)
{
pthread_create(&pth[i], NULL, threadFun, NULL);
}
// for (int i = 0; i < NUM_LOOP; ++i)
// {
// carReviseInterface_gpu(src_gpu, res_gpu);
// //cuResize(src_gpu, res_gpu, size);
// }
const double timeSec2 = (getTickCount() - start2) / getTickFrequency();
cout << "GPU Time : " << (timeSec2 * 1000) / NUM_LOOP << " ms" << endl;
imshow("8 cpu src", src);
// imshow("9 cpu res", res);
imshow("8 gpu src", src_gpu);
// imshow("9 gpu res", res_gpu);
waitKey(0);
return 0;
}
我该怎么办? 在此先感谢您的帮助〜
答案 0 :(得分:0)
OpenCV中多线程的关键是仅使用带有stream参数的GPU函数。
cv::cuda::Stream myStream;
gpuImage.download(cpuImage, myStream);
从多个线程访问时,不带流的OpenCV cuda函数可能存在缓冲区覆盖问题。