我创建了一个C#程序,它使用managedCUDA来计算许多“行星”或“球”之间的物体相互作用。我使用单个float和int计算作为测试使CUDA正常工作,但现在使用数组,它似乎无法正常工作。我在C#程序和内核中都有相同的结构:
struct Ball
{
float2 position;
float2 velocity;
float mass;
};
这是我在C#程序中用于初始化内核的代码:
//initializes the CUDA context
cuda = new CudaContext();
//Loads the two kernels, velocity calculation and positions updating according to the velocity
UpdateBallGravity = cuda.LoadKernel("kernel.ptx", "UpdateBallGravity");
UpdateBallPosition = cuda.LoadKernel("kernel.ptx", "UpdateBallPosition");
//allocates gpu memory for a new Ball[] and copies it
d_balls = new Ball[1024];
//generates new balls on the gpu memory
Random random = new Random();
for (int i = 0; i < d_balls.Size; i++)
{
d_balls[i] = new Ball(
(float)random.NextDouble() * ClientSize.X,
(float)random.NextDouble() * ClientSize.Y,
(float)random.NextDouble() * 20000);
}
当我要渲染时,我设置一个断点来检查来自gpu的值,并发现在更新球的速度和位置后,我在每个球的位置和速度成员中得到NaN。质量不会改变,因为我没有在内核中修改它。以下是两个内核:
__global__ void UpdateBallGravity(Ball *balls, int ballCount, float gravityInfluence)
{
int idx = getGlobalIdx_3D_3D();
if (idx >= ballCount)
return;
float2 gravity = float2();
for (int i = 0; i < ballCount; i++)
{
if (i == idx)
continue;
Ball remote = balls[i];
float2 difference = make_float2(remote.position.x - balls[idx].position.x, remote.position.y - balls[idx].position.y);
float f = (balls[idx].mass + remote.mass) / lengthSquared2f(difference);
gravity.y += difference.y*f;
}
balls[idx].velocity.x += gravity.x*gravityInfluence;
balls[idx].velocity.y += gravity.y*gravityInfluence;
}
__global__ void UpdateBallPosition(Ball *balls, int ballCount)
{
int idx = getGlobalIdx_3D_3D();
if (idx >= ballCount)
return;
balls[idx].position.x += balls[idx].velocity.x;
balls[idx].position.y += balls[idx].velocity.y;
}