CUDA中x方向的Sobel滤波器

时间:2012-06-18 21:21:30

标签: cuda textures

我正在尝试在每个像素的x方向上对灰度图像应用sobel滤镜并显示结果。 X方向索贝尔滤波器是: -

-1 0 1   
-2 0 2  
-1 0 1   

我没有得到所需的结果。有人可以指出我的错误吗?我正在尝试使用纹理,我不确定我是否正确使用了它:

#include <cuda.h>
#include<iostream>
using namespace std;
#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
#define CudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )
texture <float,2,cudaReadModeElementType> tex1;
//Kernel for x direction sobel
__global__ void implement_x_sobel(float* garbage,float* output,int width,int height,int              widthStep)
{
int x=blockIdx.x*blockDim.x+threadIdx.x;
int y=blockIdx.y*blockDim.y+threadIdx.y;

float output_value=((0*tex2D(tex1,x,y))+(2*tex2D(tex1,x+1,y))+(-2*tex2D(tex1,x-  1,y))+(0*tex2D(tex1,x,y+1))+(1*tex2D(tex1,x+1,y+1))+(-1*tex2D(tex1,x-1,y+1))+  (1*tex2D(tex1,x+1,y-1))+(0*tex2D(tex1,x,y-1))+(-1*tex2D(tex1,x-1,y-1)));
output[y*widthStep+x]=output_value;
}
//Kernel for y direction sobel
//__global__ void implement_y_sobel(float* input,float* output,int width,int height,int widthStep)
//{

//}
//Host Code
 inline void __cudaSafeCall( cudaError err, const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
if ( cudaSuccess != err )
{
    printf("cudaSafeCall() failed at %s:%i : %s\n",
             file, line, cudaGetErrorString( err ) );
    exit( -1 );
}    
#endif

return;
}

inline void __cudaCheckError( const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if ( cudaSuccess != err )
{
    printf("cudaCheckError() failed at %s:%i : %s\n",
             file, line, cudaGetErrorString( err ) );
   exit( -1 );
}
#endif

return;
}



void sobel(float* input,float* output,int width,int height,int widthStep)
{
cudaChannelFormatDesc     channelDesc=cudaCreateChannelDesc(32,32,0,0,cudaChannelFormatKindFloat);
cudaArray * cuArray;
CudaSafeCall(cudaMallocArray(&cuArray,&channelDesc,width,height));
cudaMemcpyToArray(cuArray,0,0,input,widthStep*height,cudaMemcpyHostToDevice);
tex1.addressMode[0]=cudaAddressModeClamp;
tex1.addressMode[1]=cudaAddressModeClamp;
tex1.filterMode=cudaFilterModeLinear;
tex1.normalized=false;
cudaBindTextureToArray(tex1,cuArray,channelDesc);
float * D_output_x;
float * garbage=NULL;
CudaSafeCall(cudaMalloc(&D_output_x,widthStep*height)); 
dim3 blocksize(16,16);
dim3 gridsize;
gridsize.x=(width+blocksize.x-1)/blocksize.x;
gridsize.y=(height+blocksize.y-1)/blocksize.y;

//kernel call
implement_x_sobel<<<gridsize,blocksize>>>(garbage,D_output_x,width,height,widthStep/sizeof(float));
cudaThreadSynchronize();
CudaCheckError();
CudaSafeCall(cudaMemcpy(output,D_output_x,height*widthStep,cudaMemcpyDeviceToHost));
cudaFree(D_output_x);
cudaFree(garbage);
cudaFreeArray(cuArray);
}

我的主要档案: -

#include<iostream>
#include <stdio.h>
#include <stdlib.h>
#include<opencv/highgui.h>
#include<opencv/cv.h>
#include"header.h"
using namespace std;
void main()
{
IplImage* img1=cvLoadImage("C://test.jpg",CV_LOAD_IMAGE_GRAYSCALE); 
if( !img1) {
           printf("ERROR: couldnt load file!\n");
           }
IplImage* img2=cvCreateImage(cvGetSize(img1),IPL_DEPTH_32F,img1->nChannels);
IplImage* img3=cvCreateImage(cvGetSize(img1),IPL_DEPTH_32F,img1->nChannels);
unsigned char * pseudo_input=(unsigned char *)img1->imageData;
float * output=(float*)img2->imageData;
float *input=(float*)img3->imageData;
int s=img1->widthStep/sizeof(float);
for(int w=0;w<=(img1->height);w++)
    for(int h=0;h<(img1->width*img1->nChannels);h++)
    {
        input[w*s+h]= pseudo_input[w*s+h];
            }

sobel(input,output,img1->width,img1->height,img1->widthStep);
cvShowImage("Original Image",img1);
cvShowImage("Sobeled Image",img2);
cvWaitKey(0);
    }}

2 个答案:

答案 0 :(得分:1)

cudaCreateChannelDesc期望前4个参数为x,y,z和w分量的位数。 float纹理应为32。

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);

答案 1 :(得分:1)

如果没有更多信息,很难诊断问题。如果你没有得到有意义的输出(例如纹理读取全0),这意味着纹理设置或绑定有问题。

如果你稍微离开,那可能是因为你需要将坐标偏移0.5f,而当你在它时,要更加谨慎地将你的int显式转换为浮点数。如果在调用tex2D()之前声明并赋值float-values变量,代码将不会运行得更慢。