我在vs2015中使用cuda 9.0。以下是一些导致错误Too few arguments
的cuda代码:
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventCreate(&end);
这些代码位于函数calcLBPGPU
(https://github.com/nourani/LBP/blob/master/LBPGPU.cu)中,如下所示:
void calcLBPGPU( const unsigned char * h_src, unsigned char * h_dst, const int width, const int height,
const LBPMapping * mapping ) {
unsigned char *d_Src = NULL, *d_Dst = NULL;
cudaMalloc( &d_Src, sizeof(char) * height * width );
cudaMalloc( &d_Dst, sizeof(char) * height * width );
cudaMemset( (void *) d_Dst, 0, sizeof(char) * height * width );
cudaMemcpy( d_Src, h_src, sizeof(char) * height * width, cudaMemcpyHostToDevice );
dim3 numThreadsPerBlock, numBlocks;
numThreadsPerBlock.x = width;
// numThreadsPerBlock.y = height;
numBlocks.x = height;
// numBlocks.y = 1;
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventCreate(&end);
float time;
cout << "before gpu call" << endl;
cudaEventRecord( start, 0 );
calcLBPKernel<<< numBlocks, numThreadsPerBlock >>>( d_Src, d_Dst, width, height, mapping );
cudaEventRecord( end, 0 );
cudaEventSynchronize( end );
cudaEventElapsedTime( &time, start, end );
cudaEventDestroy( start );
cudaEventDestroy( end );
cout << "after gpu sync. Took " << time / 1000 << "s" << endl;
cudaMemcpy( h_dst, d_Dst, sizeof(char) * height * width, cudaMemcpyDeviceToHost );
cudaFree( d_Src );
cudaFree( d_Dst );
}