Cuda的全局功能无法正确交换全局内存数据
我想沿上下方向镜像翻转矩阵,但是 global 函数会将数据重置为零,我不知道为什么?
我编写了一个名为“交换”的设备函数,但是它确实有效吗?
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cufft.h"
#include <iostream>
using namespace std;
// avoid memory leaking
struct cudaMemGuard {
cudaMemGuard(void** memPtr) :m_ptr(memPtr) { }
~cudaMemGuard() {
cudaFree(*m_ptr);
}
private:
void **m_ptr = nullptr;
};
#define checkCudaErrors(a) do{if((a)!=cudaSuccess){cudaError_t errCode=cudaGetLastError(); \
fprintf(stderr,"Cuda errors: %s, File: %s, Line: %d", \
cudaGetErrorString(errCode), __FILE__, __LINE__);cudaDeviceReset();return errCode;}}while(0);
__device__ void Swap(double& x1, double &x2) {
double temp = x1;
x1 = x2;
x2 = temp;
}
__global__ void fftShiftKernel(double *data, int rows, int cols) {
unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
unsigned idy = blockIdx.y*blockDim.y + threadIdx.y;
unsigned index = idy * blockDim.x*gridDim.x + idx;
if ((rows & 1) == 0) {
unsigned gapX = cols / 2, gapY = rows / 2;
if (idy < gapX) {
unsigned i = index, j = index + gapY * cols;
//Swap(data[i], data[j]); // It does work!
double temp = data[j];
data[j] = data[i];
data[i] = temp; // when this line executed, data[j] would be reset to 0 concurrently, why? But if I use the Swap function, it does work, I'm confused!
}
}
}
cudaError_t fftShift(double *data, double *result, int rows, int cols) {
double *devDataPtr;
cudaMemGuard devDataPtrG((void**)&devDataPtr);
cudaSetDevice(0);
unsigned byteNum = sizeof(double)*rows*cols;
checkCudaErrors(cudaMalloc((void**)&devDataPtr, byteNum));
checkCudaErrors(cudaMemcpy(devDataPtr, data, byteNum, cudaMemcpyHostToDevice));
dim3 threadsPerBlock(16, 16, 1);
dim3 blocksPerGrid((rows + 15) / 16, (cols + 15) / 16);
fftShiftKernel << <blocksPerGrid, threadsPerBlock >> > (devDataPtr, rows, cols);
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaMemcpy(result, devDataPtr, byteNum, cudaMemcpyDeviceToHost));
cudaDeviceSynchronize();
cudaDeviceReset();
return cudaSuccess;
}
int main()
{
int rows = 6, cols = 5;
double *data = new double[rows*cols];
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
data[i*cols + j] = (double)i*cols + j; //{ (double)i*cols + j + 1,(double)i*cols + j + 1 };
cout << data[i*cols + j] << " ";
}
cout << endl;
}
cout << endl;
double *result = new double[rows*cols];
fftShift(data, result, rows, cols);
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
cout << result[i*cols + j] << " ";
}
cout << endl;
}
return 0;
}
我想在上下方向镜像翻转矩阵,但是在下半部分输出为零。我发现当执行“ double temp = data [j]; data [j] = data [i]; data [i] = temp;”时, global 函数将在执行最后一行代码后将数据重置为零,我不知道为什么?