这是我的内核函数,它做的很简单:
&
blockIdx.x
每个__global__
void g_compact(const unsigned int* v_array, unsigned int* compact_array, int size)
{
const int p_index = blockIdx.x * size + threadIdx.x;
if ((v_array[threadIdx.x] & 1) == blockIdx.x)
{
compact_array[p_index]= 1;
}
else
{
compact_array[p_index]= 0;
}
}
项都可以过滤偶数和奇数:
1 0 1625730008 32767 1625730024 32767 4197775 0 0 0 4197470 0 0 0 2525809656 32630 1 0 1625729712 32767
但是,每次运行程序时都会产生随机结果,例如
0
让我感到困惑的是,结果不是1
或if
,因为我的else
和#include <iostream>
void print_array(const unsigned int* v_array, int size)
{
for (int i = 0; i < size; ++i)
{
std::cout<<v_array[i]<<" ";
}
std::cout<<std::endl;
}
__global__
void g_compact(const unsigned int* v_array, unsigned int* compact_array, int size)
{
const int p_index = blockIdx.x * size + threadIdx.x;
if (true)
{
compact_array[p_index]= 1;
}
else
{
compact_array[p_index]= 0;
}
}
int main(int argc, char const *argv[])
{
unsigned int *d_in;
unsigned int *d_out;
cudaMalloc(&d_in, sizeof(unsigned int) * 10);
cudaMalloc(&d_out, sizeof(unsigned int) * 20);
unsigned int h_array[10] = {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10
};
cudaMemcpy(d_in, h_array, sizeof(unsigned int) * 10, cudaMemcpyHostToDevice);
g_compact<<<2, 10>>>(h_array, d_out, 10);
unsigned int h_out[20];
cudaMemcpy(h_out, d_out, sizeof(unsigned int) * 20, cudaMemcpyDeviceToHost);
print_array(h_out, 20);
return 0;
}
应涵盖所有情况。
有人可以帮我解决这个问题吗?
总计划:
Generic IOS device
答案 0 :(得分:1)
问题不在于你如何编写内核函数,而是如何调用它:
unsigned int h_array[10] = {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10
};
cudaMemcpy(d_in, h_array, sizeof(unsigned int) * 10, cudaMemcpyHostToDevice);
g_compact<<<2, 10>>>(h_array, d_out, 10);
将主机指针(h_array
)传递给内核函数。它是如何工作的?
我认为您要将h_array
更改为d_in
g_compact<<<2, 10>>>(d_in, d_out, 10);