我正在尝试在每个像素的x方向上对灰度图像应用sobel滤镜并显示结果。 X方向索贝尔滤波器是: -
-1 0 1
-2 0 2
-1 0 1
我没有得到所需的结果。有人可以指出我的错误吗?我正在尝试使用纹理,我不确定我是否正确使用了它:
#include <cuda.h>
#include<iostream>
using namespace std;
#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ )
texture <float,2,cudaReadModeElementType> tex1;
//Kernel for x direction sobel
__global__ void implement_x_sobel(float* garbage,float* output,int width,int height,int widthStep)
{
int x=blockIdx.x*blockDim.x+threadIdx.x;
int y=blockIdx.y*blockDim.y+threadIdx.y;
float output_value=((0*tex2D(tex1,x,y))+(2*tex2D(tex1,x+1,y))+(-2*tex2D(tex1,x- 1,y))+(0*tex2D(tex1,x,y+1))+(1*tex2D(tex1,x+1,y+1))+(-1*tex2D(tex1,x-1,y+1))+ (1*tex2D(tex1,x+1,y-1))+(0*tex2D(tex1,x,y-1))+(-1*tex2D(tex1,x-1,y-1)));
output[y*widthStep+x]=output_value;
}
//Kernel for y direction sobel
//__global__ void implement_y_sobel(float* input,float* output,int width,int height,int widthStep)
//{
//}
//Host Code
inline void __cudaSafeCall( cudaError err, const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
if ( cudaSuccess != err )
{
printf("cudaSafeCall() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
inline void __cudaCheckError( const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if ( cudaSuccess != err )
{
printf("cudaCheckError() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
void sobel(float* input,float* output,int width,int height,int widthStep)
{
cudaChannelFormatDesc channelDesc=cudaCreateChannelDesc(32,32,0,0,cudaChannelFormatKindFloat);
cudaArray * cuArray;
CudaSafeCall(cudaMallocArray(&cuArray,&channelDesc,width,height));
cudaMemcpyToArray(cuArray,0,0,input,widthStep*height,cudaMemcpyHostToDevice);
tex1.addressMode[0]=cudaAddressModeClamp;
tex1.addressMode[1]=cudaAddressModeClamp;
tex1.filterMode=cudaFilterModeLinear;
tex1.normalized=false;
cudaBindTextureToArray(tex1,cuArray,channelDesc);
float * D_output_x;
float * garbage=NULL;
CudaSafeCall(cudaMalloc(&D_output_x,widthStep*height));
dim3 blocksize(16,16);
dim3 gridsize;
gridsize.x=(width+blocksize.x-1)/blocksize.x;
gridsize.y=(height+blocksize.y-1)/blocksize.y;
//kernel call
implement_x_sobel<<<gridsize,blocksize>>>(garbage,D_output_x,width,height,widthStep/sizeof(float));
cudaThreadSynchronize();
CudaCheckError();
CudaSafeCall(cudaMemcpy(output,D_output_x,height*widthStep,cudaMemcpyDeviceToHost));
cudaFree(D_output_x);
cudaFree(garbage);
cudaFreeArray(cuArray);
}
我的主要档案: -
#include<iostream>
#include <stdio.h>
#include <stdlib.h>
#include<opencv/highgui.h>
#include<opencv/cv.h>
#include"header.h"
using namespace std;
void main()
{
IplImage* img1=cvLoadImage("C://test.jpg",CV_LOAD_IMAGE_GRAYSCALE);
if( !img1) {
printf("ERROR: couldnt load file!\n");
}
IplImage* img2=cvCreateImage(cvGetSize(img1),IPL_DEPTH_32F,img1->nChannels);
IplImage* img3=cvCreateImage(cvGetSize(img1),IPL_DEPTH_32F,img1->nChannels);
unsigned char * pseudo_input=(unsigned char *)img1->imageData;
float * output=(float*)img2->imageData;
float *input=(float*)img3->imageData;
int s=img1->widthStep/sizeof(float);
for(int w=0;w<=(img1->height);w++)
for(int h=0;h<(img1->width*img1->nChannels);h++)
{
input[w*s+h]= pseudo_input[w*s+h];
}
sobel(input,output,img1->width,img1->height,img1->widthStep);
cvShowImage("Original Image",img1);
cvShowImage("Sobeled Image",img2);
cvWaitKey(0);
}}
答案 0 :(得分:1)
cudaCreateChannelDesc
期望前4个参数为x,y,z和w分量的位数。 float
纹理应为32。
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
答案 1 :(得分:1)
如果没有更多信息,很难诊断问题。如果你没有得到有意义的输出(例如纹理读取全0),这意味着纹理设置或绑定有问题。
如果你稍微离开,那可能是因为你需要将坐标偏移0.5f,而当你在它时,要更加谨慎地将你的int显式转换为浮点数。如果在调用tex2D()之前声明并赋值float-values变量,代码将不会运行得更慢。