Question

我创建了一个C＃程序，它使用managedCUDA来计算许多“行星”或“球”之间的物体相互作用。我使用单个float和int计算作为测试使CUDA正常工作，但现在使用数组，它似乎无法正常工作。我在C＃程序和内核中都有相同的结构：

struct Ball
{
    float2 position;
    float2 velocity;
    float mass;
};

这是我在C＃程序中用于初始化内核的代码：

//initializes the CUDA context
cuda = new CudaContext();

//Loads the two kernels, velocity calculation and positions updating according to the velocity
UpdateBallGravity = cuda.LoadKernel("kernel.ptx", "UpdateBallGravity");
UpdateBallPosition = cuda.LoadKernel("kernel.ptx", "UpdateBallPosition");

//allocates gpu memory for a new Ball[] and copies it
d_balls = new Ball[1024];

//generates new balls on the gpu memory
Random random = new Random();
for (int i = 0; i < d_balls.Size; i++)
{
    d_balls[i] = new Ball(
        (float)random.NextDouble() * ClientSize.X,
        (float)random.NextDouble() * ClientSize.Y,
        (float)random.NextDouble() * 20000);
}

当我要渲染时，我设置一个断点来检查来自gpu的值，并发现在更新球的速度和位置后，我在每个球的位置和速度成员中得到NaN。质量不会改变，因为我没有在内核中修改它。以下是两个内核：

__global__ void UpdateBallGravity(Ball *balls, int ballCount, float gravityInfluence)
{
    int idx = getGlobalIdx_3D_3D();
    if (idx >= ballCount)
        return;
    float2 gravity = float2();
    for (int i = 0; i < ballCount; i++)
    {
        if (i == idx)
            continue;
        Ball remote = balls[i];
        float2 difference = make_float2(remote.position.x - balls[idx].position.x, remote.position.y - balls[idx].position.y);
        float f = (balls[idx].mass + remote.mass) / lengthSquared2f(difference);
        gravity.y += difference.y*f;
    }
    balls[idx].velocity.x += gravity.x*gravityInfluence;
    balls[idx].velocity.y += gravity.y*gravityInfluence;
}

__global__ void UpdateBallPosition(Ball *balls, int ballCount)
{
    int idx = getGlobalIdx_3D_3D();
    if (idx >= ballCount)
        return;
    balls[idx].position.x += balls[idx].velocity.x;
    balls[idx].position.y += balls[idx].velocity.y;
}

CUDA计算后C＃侧的NaN值

0 个答案: