Question

我使用atomicAdd（）使用两种不同的方案为数组c = {0,0,0,0,0}的每个元素添加1

c [i] = c [i] + 1;

结果 - c = {1,1,1,1,1}

c [i] = atomicAdd（＆amp;（c [i]），1）;

结果c = {0,0,0,0,0}

我完全不知道为什么我会得到这样的结果，这是我用来获得结果的小代码。

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include<windows.h>

void addWithCuda(int *c, int size);

__global__ void addKernel(int *c, int size)
{
    int i = threadIdx.x;
    if (i < size)
     c[i] = c[i] + 1;
    //c[i] = atomicAdd(&(c[i]),(int)1);
}

int main()
{
    const int arraySize = 5;

    int c[arraySize] = {0,0,0,0,0};

    // Add vectors in parallel.
    addWithCuda(c, arraySize);

    Sleep(3000);
    printf("result = {%d,%d,%d,%d,%d}\n",
        c[0], c[1], c[2], c[3], c[4]);
    return 0;
}

// Helper function for using CUDA to add vectors in parallel.
void addWithCuda(int *c, int size)
{

    int *dev_c = 0;
    cudaError_t cudaStatus;

    // Choose which GPU to run on, change this on a multi-GPU system.
    cudaStatus = cudaSetDevice(0);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU         installed?");

    }

    // Allocate GPU buffers for three vectors (two input, one output)    .
    cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");

    }

        cudaStatus = cudaMemcpy(dev_c, c, size * sizeof(int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");

    }

    // Launch a kernel on the GPU with one thread for each element.
    addKernel<<<1, size>>>(dev_c, size);


    // cudaDeviceSynchronize waits for the kernel to finish, and returns
    // any errors encountered during the launch.
    cudaStatus = cudaDeviceSynchronize();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);

    }

    // Copy output vector from GPU buffer to host memory.
    cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");

    }
}

Answer 1

c[i] = atomicAdd(&(c[i]),(int)1);

应该是

atomicAdd(&(c[i]),(int)1);

基本上是＆amp;（c [i]），引用调用用于在数组中直接添加+1。 atomicAdd返回0;你把零放在数组里面。

CUDA atomicAdd（）给出了错误的结果

1 个答案: