我使用OpenCV和Visual Studio 2012编译了这个简单的颜色跟踪图像处理程序。 首先我用CPU编译它。 程序:
#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <time.h>
using namespace cv;
using namespace std;
int main( int argc, char** argv )
{
time_t t= time(0);
VideoCapture cap(0); //capture the video from web cam
if ( !cap.isOpened() ) // if not success, exit program
{
cout << "Cannot open the web cam" << endl;
return -1;
}
double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Frame size : " << dWidth << " x " << dHeight << endl;
namedWindow("Control", CV_WINDOW_AUTOSIZE); //create a window called "Control"
int iLowH = 0;
int iHighH = 179;
int iLowS = 0;
int iHighS = 255;
int iLowV = 0;
int iHighV = 255;
//Create track bars in "Control" window
cvCreateTrackbar("LowH", "Control", &iLowH, 179); //Hue (0 - 179)
cvCreateTrackbar("HighH", "Control", &iHighH, 179);
cvCreateTrackbar("LowS", "Control", &iLowS, 255); //Saturation (0 - 255)
cvCreateTrackbar("HighS", "Control", &iHighS, 255);
cvCreateTrackbar("LowV", "Control", &iLowV, 255); //Value (0 - 255)
cvCreateTrackbar("HighV", "Control", &iHighV, 255);
int fps=0;
int cur=0;
while (true)
{
fps++;
t=time(0);
struct tm *tmp = gmtime(&t);
int h= (t/360) %24;
int m= (t/60) %60;
int s = t%60;
if(cur !=s)
{
cout<<fps<<endl;
fps=0;
cur=s;
}
Mat imgOriginal;
bool bSuccess = cap.read(imgOriginal); // read a new frame from video
if (!bSuccess) //if not success, break loop
{
cout << "Cannot read a frame from video stream" << endl;
break;
}
Mat imgHSV;
cvtColor(imgOriginal, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
Mat imgThresholded;
inRange(imgHSV, Scalar(iLowH, iLowS, iLowV), Scalar(iHighH, iHighS, iHighV), imgThresholded); //Threshold the image
//morphological opening (remove small objects from the foreground)
erode(imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
dilate( imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
//morphological closing (fill small holes in the foreground)
dilate( imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
erode(imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
imshow("Thresholded Image", imgThresholded); //show the thresholded image
imshow("Original", imgOriginal); //show the original image
if (waitKey(30) == 27) //wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
{
cout << "esc key is pressed by user" << endl;
break;
}
}
return 0;
}
我的相机给出了16的fps。 然后我使用OpenCL(GPU支持)编译了这个程序。 程序:
#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2/ocl/ocl.hpp>
#include <time.h>
using namespace cv;
using namespace std;
int main( int argc, char** argv )
{
time_t t= time(0);
VideoCapture cap(0); //capture the video from web cam
if ( !cap.isOpened() ) // if not success, exit program
{
cout << "Cannot open the web cam" << endl;
return -1;
}
double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Frame size : " << dWidth << " x " << dHeight << endl;
namedWindow("Control", CV_WINDOW_AUTOSIZE); //create a window called "Control"
int iLowH = 0;
int iHighH = 179;
int iLowS = 0;
int iHighS = 255;
int iLowV = 0;
int iHighV = 255;
//Create track bars in "Control" window
cvCreateTrackbar("LowH", "Control", &iLowH, 179); //Hue (0 - 179)
cvCreateTrackbar("HighH", "Control", &iHighH, 179);
cvCreateTrackbar("LowS", "Control", &iLowS, 255); //Saturation (0 - 255)
cvCreateTrackbar("HighS", "Control", &iHighS, 255);
cvCreateTrackbar("LowV", "Control", &iLowV, 255); //Value (0 - 255)
cvCreateTrackbar("HighV", "Control", &iHighV, 255);
int fps=0;
int cur=0;
while (true)
{
fps++;
t=time(0);
struct tm *tmp = gmtime(&t);
int h= (t/360) %24;
int m= (t/60) %60;
int s = t%60;
if(cur !=s)
{
cout<<fps<<endl;
fps=0;
cur=s;
}
Mat imgOriginal;
bool bSuccess = cap.read(imgOriginal); // read a new frame from video
if (!bSuccess) //if not success, break loop
{
cout << "Cannot read a frame from video stream" << endl;
break;
}
Mat imgHSV;
cvtColor(imgOriginal, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
Mat imgThresholded;
inRange(imgHSV, Scalar(iLowH, iLowS, iLowV), Scalar(iHighH, iHighS, iHighV), imgThresholded); //Threshold the image
//morphological opening (remove small objects from the foreground)
ocl::oclMat alpha(imgThresholded);
ocl::erode(alpha,alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
ocl::dilate( alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
//morphological closing (fill small holes in the foreground)
ocl::dilate( alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
ocl::erode(alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
imgThresholded = Mat(alpha);
imshow("Thresholded Image", imgThresholded); //show the thresholded image
imshow("Original", imgOriginal); //show the original image
if (waitKey(30) == 27) //wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
{
cout << "esc key is pressed by user" << endl;
break;
}
}
return 0;
}
但现在我的fps = 10。请有人告诉我为什么会这样。 我在某处读到GPU支持提高了fps性能。我正在使用的显卡是AMD RAEDON。
答案 0 :(得分:2)
GPU专为大规模吞吐量而设计,但将数据从CPU内存移动到GPU内存需要花费大量时间。你不应该认为GPU总是增加fps。这一切都取决于GPU的功能收获情况。
在你的情况下,你似乎对每一帧都做了很少的工作。所以我的猜测是你的系统大部分时间都在使用移动帧到GPU并将结果移回去。
答案 1 :(得分:1)
(正如maZZZu评论的那样)
您正在进行连续计算。添加流水线。然后,当捕获帧时,opencl同时计算最后一帧。你可以重叠更多的步骤,如:
然后在FPS上只能看到最耗时的步骤。如果复制到gpu需要20ms,那么其他的将被隐藏,程序将显示50FPS。
- Time 1: get video data 1
- (Time 2: get video data 2) and (copy data 1 to gpu)
- (Time 3: get video data 3) and (copy data 2 to gpu) and (compute data 1)
- (Time 4: get video data 4) and (copy data 3 to gpu) and (compute data 2) and ..
- (Time 5: get video data 5) and (copy data 4 to gpu) and (compute data 3) and ..
- (Time 6: get video data 6) and (copy data 5 to gpu) and (compute data 4) and ..
- (Time 7: get video data 8) and (copy data 6 to gpu) and (compute data 5) and ..
因此,如果复制到gpu需要%45并且获得结果需要花费%45%的时间,那么FPS应该增加%90,只需将其中一个隐藏在其他位置之后。