这是我的代码
using namespace std;
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#define N 8000
void fillArray(int *data, int count){
for(int i =0; i < count; i++)
data[i] = (int) rand() / ((int) RAND_MAX);
}
__global__ void add(int* a, int *b){
int add = 0;
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if(tid < N){
add = a[tid] + b[tid];
}
}
__global__ void subtract(int* a, int *b){
int subtract = 0;
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if(tid < N){
subtract = a[tid] - b[tid];
}
}
float duration(int *devA, int *devB, int blocksPerGrid, int threadsPerBlock){
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaMalloc((void**) &devA, N * sizeof(int));
cudaMalloc((void**) &devB, N * sizeof(int));
add<<<blocksPerGrid, threadsPerBlock>>>(devA,devB);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime,start,stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
return elapsedTime;
}
int main(void) {
int *a = new int(N);
int *b = new int(N);
float dur = 0 ;
fillArray(a, N);
fillArray(b, N);
dur = duration(a,b,N,1);
cout << "Global memory version:\n";
cout << "Process completed in " << dur;
cout << "for a data set of " << N << " integers.";
return 0;
}
正如您所看到的,我在CPU端填充了fillArray函数。但填充数组函数给出错误:
malloc.c 3906 : sYSMalloc: Assertion bla bla
我在这里缺少什么?我只是尝试填充数组。我可能会遇到什么问题?事件,如果我删除持续时间功能中的添加功能,我得到此错误。这有什么问题?
答案 0 :(得分:2)
错误在于创建a
和b
数组。正如@QWR和@talonmies所说,使用valgrind
(或任何Windows substitute)可以帮助您找到此类错误的来源:
==8288== Invalid write of size 4
==8288== at 0x400DD2: fillArray(int*, int) (kernel.cu:11)
==8288== by 0x400F79: main (kernel.cu:63)
==8288== Address 0x62783e4 is 0 bytes after a block of size 4 alloc'd
==8288== at 0x4C2BA77: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==8288== by 0x400F41: main (kernel.cu:57)
==8288==
==8288== Invalid write of size 4
==8288== at 0x400DD2: fillArray(int*, int) (kernel.cu:11)
==8288== by 0x400F8A: main (kernel.cu:64)
==8288== Address 0x6278434 is 0 bytes after a block of size 4 alloc'd
==8288== at 0x4C2BA77: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==8288== by 0x400F55: main (kernel.cu:58)
如果你改变:
int *a = new int(N);
int *b = new int(N);
要:
int *a = new int[N];
int *b = new int[N];
错误消失。实际上,你没有为数组分配内存,只是为了一个整数。
调试CUDA代码时,需要使用GPU /设备调试工具(cuda-memcheck
,cuda-gdb
)和CPU /主机工具(valgrind
),因为错误可能发生在GPU和CPU都有。不要忘记使用nvcc
:-G
的两个调试标志进行编译以获取设备代码,并使用-g
进行编译以获取主机代码。
对于good practice,您还应该在主页末尾delete
使用数组:
delete [] a;
delete [] b;