CUDA程序返回随机结果

时间:2016-12-20 12:23:35

标签: c cuda

这是我的内核函数,它做的很简单: & blockIdx.x每个__global__ void g_compact(const unsigned int* v_array, unsigned int* compact_array, int size) { const int p_index = blockIdx.x * size + threadIdx.x; if ((v_array[threadIdx.x] & 1) == blockIdx.x) { compact_array[p_index]= 1; } else { compact_array[p_index]= 0; } } 项都可以过滤偶数和奇数:

1  0  1625730008  32767  1625730024  32767  4197775  0  0  0  4197470  0  0  0  2525809656  32630  1  0  1625729712  32767

但是,每次运行程序时都会产生随机结果,例如

0

让我感到困惑的是,结果不是1if,因为我的else#include <iostream> void print_array(const unsigned int* v_array, int size) { for (int i = 0; i < size; ++i) { std::cout<<v_array[i]<<" "; } std::cout<<std::endl; } __global__ void g_compact(const unsigned int* v_array, unsigned int* compact_array, int size) { const int p_index = blockIdx.x * size + threadIdx.x; if (true) { compact_array[p_index]= 1; } else { compact_array[p_index]= 0; } } int main(int argc, char const *argv[]) { unsigned int *d_in; unsigned int *d_out; cudaMalloc(&d_in, sizeof(unsigned int) * 10); cudaMalloc(&d_out, sizeof(unsigned int) * 20); unsigned int h_array[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; cudaMemcpy(d_in, h_array, sizeof(unsigned int) * 10, cudaMemcpyHostToDevice); g_compact<<<2, 10>>>(h_array, d_out, 10); unsigned int h_out[20]; cudaMemcpy(h_out, d_out, sizeof(unsigned int) * 20, cudaMemcpyDeviceToHost); print_array(h_out, 20); return 0; } 应涵盖所有情况。

有人可以帮我解决这个问题吗?

总计划:

Generic IOS device

1 个答案:

答案 0 :(得分:1)

问题不在于你如何编写内核函数,而是如何调用它:

unsigned int h_array[10] = {
    1, 2, 3, 4,
    5, 6, 7, 8,
    9, 10
};

cudaMemcpy(d_in, h_array, sizeof(unsigned int) * 10, cudaMemcpyHostToDevice);

g_compact<<<2, 10>>>(h_array, d_out, 10);

将主机指针(h_array)传递给内核函数。它是如何工作的?

我认为您要将h_array更改为d_in

g_compact<<<2, 10>>>(d_in, d_out, 10);