我在使用Matlab和CUDA时遇到了一些奇怪的问题。基本上,我有这个CUDA代码打印以下
CUDA代码
#include "cuda_runtime.h
#include "device_launch_parameters.h"
#include <stdio.h>
//device functions
__device__ int getGlobalidx_1d_1d()
{
return blockIdx.x *blockDim.x + threadIdx.x;
}
//kernels
__global__ void kernel_1D_1D()
{
printf("Local thread ID: %i Local Block ID: %i, Local Block Dim: %i, Global Thread ID: %i\n", threadIdx.x, blockIdx.x, blockDim.x, getGlobalIdx_1D_1D());
}
int main()
{
printf("\nLaunching kernel as 1D grid of 1D blocks...\n");
kernel_1D_1D<<<dim3(2,1,1), dim3(10,1,1)>>>();
cudaDeviceReset();
return 0;
}
这一切都很好,很好
http://postimg.org/image/4tllso2tx/
然而,当我在Matlab中做同样的事情时,事情变得棘手
我应该得到一个矩阵,
换句话说
相反,当我提供带有参数2和10的Matlab函数时,值就到处都是
http://postimg.org/image/haiii0cjt/
Matlab功能代码
function[returnValues] = Blocks_Threads_Test(blocks,threads)
%1 Create Cuda Kernel Object
k = parallel.gpu.CUDAKernel('BlocksThreads.ptx', 'BlocksThreads.cu','BlocksThreads');
%2 Set Object properties
k.GridSize = [blocks];
k.ThreadBlockSize = [threads];
%3 Set Argument Variables
gpu_Values = gpuArray(ones(4,blocks*threads));
[data] = feval(k, gpu_Values);
returnValues = gather(data);
Cuda Matlab代码
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
__global__ void BlocksThreads(double *values)
{
unsigned int bx = blockIdx.x;
unsigned int tx = threadIdx.x;
unsigned long int globalID = (bx*blockDim.x) + tx;
values[(bx*blockDim.x) + (tx*4) + 0] = bx; //Block Number
values[(bx*blockDim.x) + (tx*4) + 1] = tx; //Thread Number
values[(bx*blockDim.x) + (tx*4) + 2] = globalID;
values[(bx*blockDim.x) + (tx*4) + 3] = blockDim.x; //Threads/Block
}
有谁知道为什么matlab中的线程,块和全局变量的id值都到位了?更不用说Matrix甚至没有填写整个过程。
如果相关,这就是我正在运行的
答案 0 :(得分:0)
您对values
矩阵的索引编号错误。变化
values[(bx*blockDim.x) + (tx*4) + k] with k=0,1,2,3
到
values[4*globalID + k] with k=0,1,2,3