我无法在这个简短的cuda代码中获得分段错误的来源。 我用它来测试Thrust库与STL库的排序速度 排序整数。我传递双精度数组的大小作为命令行排序 论点。
这是代码
inline void check_cuda_error(char *message)
{
cudaThreadSynchronize();
cudaError_t error = cudaGetLastError();
if(error != cudaSuccess)
{
printf("CUDA error after %s: %s\n", message, cudaGetErrorString(error));
}
}
int main(int argc, char *argv[])
{
int N = atoi(argv[1]);
double* h = new double[N];
for (int i = 0; i < N; ++i)
{
h[i] = (double)rand()/RAND_MAX; //std::cout << h[i] << " " ;
}
clock_t start , stop;
std::cout << std::endl;
// Start timing
start = clock();
std::sort(h, h+N);
stop = clock();
std::cout << "Host sorting took " << (stop - start) /(double)CLOCKS_PER_SEC << std::endl ;
// Start the GPU work. Initialize to random numbers again.
for (int i = 0; i < N; ++i)
{
h[i] = (double)rand()/RAND_MAX; //std::cout << h[i] << " " ;
}
double* d = 0;
const size_t num_bytes = N * sizeof( double );
cudaMalloc((void**)&d, num_bytes);
check_cuda_error("Memory Allocation");
cudaMemcpy(d ,h , N * sizeof(double), cudaMemcpyHostToDevice); // Transfer data
thrust::sort( d, d+ N ) ;
return 0;
}
我收到以下错误
[BeamerLatex/Farber]$ nvcc -arch=sm_20 sortcompare.cu ; ./a.out 16777216
Host sorting took 3.77
[1] 4661 segmentation fault ./a.out 16777216
[BeamerLatex/Farber]$
答案 0 :(得分:2)
似乎你无法在原始指针上运行thrust :: sort,你需要首先将其强制转换为device_ptr
,即:
thrust::device_ptr< double > dv = thrust::device_pointer_cast(d);
thrust::sort( dv, dv+ NN ) ;
这对我来说很好。