我有几次执行CUDA内核的问题。我的代码中的环境有些不对劲。第一次代码正常工作,在第三次调用之前第二次清理environemnt时会出现随机崩溃。 我认为由于某种原因我有记忆腐败。 CUDA驱动程序中有时会发生崩溃,有时简单的printf崩溃或廉价的kernel32.dll。我想我的代码中存在内存管理问题。
再次执行内核之前应该做些什么?
当我执行一次时,此代码有效。 我正在使用CURAND初始化随机生成器。 这是我的代码:
#define GRID_BLOCK 64
#define GRID_THREAD 8
#define CITIES 100
#define CIPOW2 101
int lenghtPaths = GRID_BLOCK*GRID_THREAD;
int cities = CITIES;
//prepare CURAND
curandState *devStates;
CUDA_CALL(cudaMalloc((void **)&devStates, GRID_BLOCK*GRID_THREAD*sizeof(curandState)));
/* Setup prng states */
setup_kernel<<<GRID_BLOCK ,GRID_THREAD>>>(devStates);
CUDA_CALL(cudaDeviceSynchronize());
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess)
fprintf(stderr, "CURAND preparation failed: %s\n", cudaGetErrorString(cudaStatus));
//copy distance grid to constant memory
cudaMemcpyToSymbol(cdist, dist, sizeof(int) *CIPOW2*CIPOW2);
CUDA_CALL(cudaMalloc((void**)&dev_pathsForThreads, lenghtPaths * cities * sizeof(int)));
CUDA_CALL(cudaMalloc((void**)&d_results, GRID_BLOCK*GRID_THREAD * sizeof(int)));
for (int k = 0; k < 5; k++){
int* pathsForThreads;
pathsForThreads = (int*)malloc(lenghtPaths * cities * sizeof(int));
pathsForThreads = PreaparePaths(Path, lenghtPaths, cities);
CUDA_CALL(cudaMemcpy(dev_pathsForThreads, pathsForThreads, lenghtPaths *cities*sizeof(int), cudaMemcpyHostToDevice));
GPUAnnealing<<<GRID_BLOCK ,GRID_THREAD >>>(dev_pathsForThreads, devStates, iterationLimit,temperature, coolingRate, absoluteTemperature, cities,d_results);
CUDA_CALL(cudaDeviceSynchronize());
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess)
fprintf(stderr, "GPUAnnealing launch failed: %s\n", cudaGetErrorString(cudaStatus));
h_results = (int*) malloc(GRID_BLOCK*GRID_THREAD * sizeof(int));
//Copy lenght of each path to CPU
CUDA_CALL(cudaMemcpy(h_results, d_results, GRID_BLOCK*GRID_THREAD * sizeof(int),cudaMemcpyDeviceToHost));
//Copy paths to CPU
CUDA_CALL(cudaMemcpy(pathsForThreads, dev_pathsForThreads, lenghtPaths *cities*sizeof(int), cudaMemcpyDeviceToHost));
//check the shortest path
shortestPath = FindTheShortestPath(h_results);
fprintf (stdout, "Shortest path on index = %d value = %d \n", shortestPath, h_results[shortestPath]);
for (int i = 0; i < GRID_BLOCK*GRID_BLOCK ; i++)
Path[i] = pathsForThreads[shortestPath*CITIES +i];
free(pathsForThreads);
free(h_results);
}
CUDA_CALL(cudaFree(dev_pathsForThreads));
CUDA_CALL(cudaFree(d_results));
CUDA_CALL(cudaFree(devStates));
CUDA_CALL(cudaDeviceReset());
答案 0 :(得分:1)
这是个坏主意:
pathsForThreads = (int*)malloc(lenghtPaths * cities * sizeof(int));
pathsForThreads = PreaparePaths(Path, lenghtPaths, cities);
如果对PreaparePaths
的调用将pathsForThreads
的某些其他值分配给malloc
操作分配给它的其他值,则稍后执行此操作时
free(pathsForThreads);
你将获得不可预测的结果。
您不应该将您随后要传递给free
的指针重新分配给其他值。免费的手册页指示:
free() frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc() or realloc().
如果您打算将指针传递给free