将设备指针传递给cublasSnrm2时出现分段错误

时间:2014-05-07 07:49:54

标签: cuda cublas

下面的cublas代码给出了我们的错误:在“cublasSnrm2(handle,row,dy,incy,de)”中倾倒核心,你能给出一些建议吗?

main.cu

#include <iostream>
#include "cublas.h"
#include "cublas_v2.h"
#include "helper_cuda.h"

using namespace std;

int main(int argc,char *args[])
{
    float y[10] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; 

    int dev=0;
    checkCudaErrors(cudaSetDevice(dev));

    //cublas init
    cublasStatus stat;
    cublasInit();

    cublasHandle_t  handle;
    stat = cublasCreate(&handle);
    if (stat !=CUBLAS_STATUS_SUCCESS)
    {
       printf("cublas handle create failed!\n");
       cublasShutdown();
    }   
   float * dy,*de,*e;
   int incy = 1,ONE = 1,row = 10; 
   e = (float *)malloc(sizeof(float)*ONE);
   e[0]=0.0f;

   checkCudaErrors(cudaMalloc(&dy,sizeof(float)*row));
   checkCudaErrors(cudaMalloc(&de,sizeof(float)*ONE));
  checkCudaErrors(cudaMemcpy(dy,y,row*sizeof(float),cudaMemcpyHostToDevice));
  checkCudaErrors(cudaMemcpy(de,e,ONE*sizeof(float),cudaMemcpyHostToDevice));
   stat = cublasSnrm2(handle,row,dy,incy,de);
   if (stat !=CUBLAS_STATUS_SUCCESS)
   {
       printf("norm2 compute failed!\n");
       cublasShutdown();
   }
   checkCudaErrors(cudaMemcpy(e,de,ONE*sizeof(float),cudaMemcpyDeviceToHost));
   std::cout<<e[0]<<endl;
    return 0;
}

makefile如下:

NVIDIA = $(HOME)/NVIDIA_CUDA-5.0_Samples
CUDA = /usr/local/cuda-5.0
NVIDINCADD = -I$(NVIDIA)/common/inc
CUDAINCADD = -I$(CUDA)/include 
CC = -L/usr/lib64/ -lstdc++

GCCOPT = -O2 -fno-rtti -fno-exceptions 
INTELOPT = -O3 -fno-rtti -xW -restrict -fno-alias
DEB = -g
NVCC = -G
ARCH = -arch=sm_35

bcg:main.cu
    nvcc $(DEB) $(NVCC)  $(ARCH) $(CC) -lm  $(NVIDINCADD) $(CUDAINCADD) -lcublas -I./ -o $(@) $(<) 
clean:
    rm -f bcg
    rm -f hyb

我的操作系统是linux redhat 6.2,CUDA的版本是5.0,GPU是K20M。

1 个答案:

答案 0 :(得分:4)

问题在于:

cublasSnrm2(handle,row,dy,incy,de);

默认情况下,最后一个参数是主机指针。因此要么将e传递给snrm2调用而不是de,要么执行此操作:

cublasSetPointerMode(handle,CUBLAS_POINTER_MODE_DEVICE); 
stat = cublasSnrm2(handle,row,dy,incy,de);

如果要传递设备指针以存储结果,则需要将指针模式设置为设备。