如何在CUDA中更改全局和设备函数中的__device__变量?

时间:2011-07-08 11:42:58

标签: cuda

程序描述如下:

#include <cuda.h> 
#include <cutil_math>  
#include <cuda_runtime.h>  
#include <iostream>

struct testtype  
{
    float x;  
    int y;  
    char z;  
};

 __device__   testtype* gpu_config;

__global__
void test()
{
    gpu_config->y = 3.0;
};

int main(void)  
{  
testtype cpu_config;

cpu_config.x = 1;
cpu_config.y = 2.0f;
cpu_config.z = 'A';

testtype val ;

if (cudaMalloc((void**) &gpu_config, sizeof(testtype)) != cudaSuccess)
{
    return -1;
}

cudaMemcpy(gpu_config, &cpu_config, sizeof(testtype), cudaMemcpyHostToDevice);

test<<<1,1,0>>>();

cudaMemcpy(&val, gpu_config, sizeof(testtype), cudaMemcpyDeviceToHost);

std::cout << val.y << std::endl;
}   

当我删除测试&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt; val与gpu_config的更改方式相同。但是当测试&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&gt;&gt;&gt;这意味着全局函数测试不会改变val的值。我想知道如何通过全局函数更改 _ 设备 _ 变量值。

1 个答案:

答案 0 :(得分:1)

#include <stdio.h>
#include <cuda.h>
#include <cutil_math.h>
#include <cuda_runtime.h>

// check runtime call error
#define cudaSafeCall(call) {  \
  cudaError err = call;       \
  if(cudaSuccess != err){     \
    fprintf(stderr, "%s(%i) : %s.\n", __FILE__, __LINE__, cudaGetErrorString(err));   \
    exit(EXIT_FAILURE);       \
}}

// check kernel launch error
#define cudaCheckErr(errorMessage) {    \
  cudaError_t err = cudaGetLastError(); \
  if(cudaSuccess != err){               \
    fprintf(stderr, "%s(%i) : %s : %s.\n", __FILE__, __LINE__, errorMessage, cudaGetErrorString(err)); \
    exit(EXIT_FAILURE);                 \
}}

struct g{
  int m;
};
__device__ struct g *d; // device (global)

__global__ void kernel()
{
  int tid=blockIdx.x * blockDim.x + threadIdx.x;
  d[tid].m=10;
}

int main()
{
  size_t size = 1 * sizeof(struct g);
  size_t sizep = 1 * sizeof(struct g*);
  struct g *ld; // device (local)
  cudaSafeCall(cudaMalloc(&ld, size));
  cudaSafeCall(cudaMemcpyToSymbol(d,&ld,sizep));
  kernel<<<1,1>>>();
  cudaSafeCall(cudaDeviceSynchronize());
  cudaCheckErr("kernel error");
  struct g *h = (struct g*)malloc(size);
  if(h==NULL){
     fprintf(stderr, "%s(%i) : malloc error.\n", __FILE__, __LINE__);
    exit(EXIT_FAILURE);
  }
  //cudaSafeCall(cudaMemcpyFromSymbol(&ld,d,sizep)); // not necessary
  cudaSafeCall(cudaMemcpy(h, ld, size, cudaMemcpyDeviceToHost));
  printf("Result: %d\n",h[0].m);
}