OpenCL内核中使用障碍的本地内存中的值错误

时间:2015-03-24 20:27:22

标签: opencl

我有一个简单的内核,它基本上从双精度列表中计算最大值并存储其索引。所有工作都在当地完成,工作组规模为64。

简而言之,我创建了一个索引列表,用连续数字(基于local_id)填充它,然后根据适应值移动。

目前我的主要问题是,读取这个索引列表会返回非常疯狂的值。我有一个printf语句,虽然它应该打印这样的东西

>> my_index 1 ; other_index 33 ; local_id 1 <<
>> my_index 2; other_index 34; local_id 2 <<
>> .......

我得到的输出是

>> my_index 1600085855 ; other_index 32 ; local_id 0 <<
>> my_index 1072652127 ; other_index 33 ; local_id 1 <<
>> my_index 942797699 ; other_index 34 ; local_id 2 <<
>> my_index 1072923423 ; other_index 35 ; local_id 3 <<
>> my_index -987348804 ; other_index 36 ; local_id 4 <<
>> my_index 1072849931 ; other_index 37 ; local_id 5 <<
>> my_index -833351863 ; other_index 38 ; local_id 6 <<
>> my_index 1073209710 ; other_index 39 ; local_id 7 <<
>> my_index -833351863 ; other_index 40 ; local_id 8 <<
>> my_index 1073209710 ; other_index 41 ; local_id 9 <<
>> my_index 1206451488 ; other_index 42 ; local_id 10 <<
>> my_index 1072822847 ; other_index 43 ; local_id 11 <<
>> my_index -1561806289 ; other_index 44 ; local_id 12 <<
>> my_index 1072836235 ; other_index 45 ; local_id 13 <<
>> my_index 1797893287 ; other_index 46 ; local_id 14 <<
>> my_index 1072863946 ; other_index 47 ; local_id 15 <<
>> my_index 1499829849 ; other_index 48 ; local_id 16 <<
>> my_index 1073309078 ; other_index 49 ; local_id 17 <<
>> my_index 1215556782 ; other_index 50 ; local_id 18 <<
>> my_index 1073623117 ; other_index 51 ; local_id 19 <<
>> my_index -1741202958 ; other_index 52 ; local_id 20 <<
>> my_index 1073061666 ; other_index 53 ; local_id 21 <<
>> my_index 1908874354 ; other_index 54 ; local_id 22 <<
>> my_index 1072809756 ; other_index 55 ; local_id 23 <<
>> my_index 1499829849 ; other_index 56 ; local_id 24 <<
>> my_index 1073309078 ; other_index 57 ; local_id 25 <<
>> my_index 1965493508 ; other_index 58 ; local_id 26 <<
>> my_index 1073421919 ; other_index 59 ; local_id 27 <<
>> my_index -1908874354 ; other_index 60 ; local_id 28 <<
>> my_index 1073101027 ; other_index 61 ; local_id 29 <<
>> my_index -1561806289 ; other_index 62 ; local_id 30 <<
>> my_index 31 ; other_index 63 ; local_id 31 <<
>> my_index 1600085855 ; other_index 1499829849 ; local_id 0 <<
>> my_index 1072652127 ; other_index 1073309078 ; local_id 1 <<
>> my_index 942797699 ; other_index 1215556782 ; local_id 2 <<
>> my_index 1072923423 ; other_index 1073623117 ; local_id 3 <<
>> my_index -987348804 ; other_index -1741202958 ; local_id 4 <<
>> my_index 1072849931 ; other_index 1073061666 ; local_id 5 <<
>> my_index -833351863 ; other_index 1908874354 ; local_id 6 <<
>> my_index 1073209710 ; other_index 1072809756 ; local_id 7 <<
>> my_index -833351863 ; other_index 1499829849 ; local_id 8 <<
>> my_index 1073209710 ; other_index 1073309078 ; local_id 9 <<
>> my_index 1206451488 ; other_index 1965493508 ; local_id 10 <<
>> my_index 1072822847 ; other_index 1073421919 ; local_id 11 <<
>> my_index -1561806289 ; other_index -1908874354 ; local_id 12 <<
>> my_index 1072836235 ; other_index 1073101027 ; local_id 13 <<
>> my_index 1797893287 ; other_index -1561806289 ; local_id 14 <<
>> my_index 1072863946 ; other_index 31 ; local_id 15 <<
>> my_index 1600085855 ; other_index -833351863 ; local_id 0 <<
>> my_index 1072652127 ; other_index 1073309078 ; local_id 1 <<
>> my_index 942797699 ; other_index 1965493508 ; local_id 2 <<
>> my_index 1073623117 ; other_index 1072822847 ; local_id 3 <<
>> my_index -1741202958 ; other_index -1908874354 ; local_id 4 <<
>> my_index 1072849931 ; other_index 1073101027 ; local_id 5 <<
>> my_index -833351863 ; other_index 1797893287 ; local_id 6 <<
>> my_index 1073209710 ; other_index 1072863946 ; local_id 7 <<
>> my_index -833351863 ; other_index -1908874354 ; local_id 0 <<
>> my_index 1073309078 ; other_index 1072849931 ; local_id 1 <<
>> my_index 942797699 ; other_index -833351863 ; local_id 2 <<
>> my_index 1073623117 ; other_index 1072863946 ; local_id 3 <<
>> my_index -833351863 ; other_index -833351863 ; local_id 0 <<
>> my_index 1073309078 ; other_index 1072863946 ; local_id 1 <<
>> my_index -833351863 ; other_index 1072863946 ; local_id 0 <<

怎么可能?

代码:

__kernel void reduce( __global char* inputAgent,
                      __global double* output,
                      __global double* bestFitness,
                      __local double* localFitness,
                      __local int* indexes,
                      const unsigned int size)
{
  int local_id = get_local_id(0);

  // populate local memory
  if (local_id <= size) {
    localFitness[local_id] = bestFitness[local_id];
  } else {
    localFitness[local_id] = 0;
  }

  //populate table with consecutive numbers
  indexes[local_id] = local_id;

  barrier(CLK_LOCAL_MEM_FENCE);

  for(int offset = get_local_size(0) / 2;
      offset > 0;
      offset >>= 1) {
    if (local_id < offset) {
      // find greater fitness
      double mine = localFitness[local_id];
      double other = localFitness[local_id + offset];
      localFitness[local_id] = (mine > other) ? mine : other;

      // store index of this greater fitness
      int my_index  = indexes[local_id];
      int other_index = indexes[local_id + offset];
      indexes[local_id] = (mine > other) ? my_index : other_index;
      printf(">> my_index %d ; other_index %d ; local_id %d <<",
             my_index, other_index, local_id);
    }
    barrier(CLK_LOCAL_MEM_FENCE);
  }


  if (local_id == 0) { //last, not mutated
    // return
    output[0] = localFitness[0];

    // reuse found index
    int bit_to_change = indexes[0];

    if (bit_to_change < size) {
      inputAgent[bit_to_change] = (inputAgent[bit_to_change] - 1) * -1;
    }
  }
}

欢迎任何建议。


修改

我能够找到barrier的问题。在添加更多printf之后,一个就在第一个障碍之前,第二个就在之后:

  [...]
  indexes[localID] = localID;

  printf("<<< Fit %f for %d\n", localFitness[localID], indexes[localID] );

  barrier(CLK_LOCAL_MEM_FENCE);

  printf("*** Fit %f for %d\n", localFitness[localID], indexes[localID] );

  [...]

输出我得到:

<<< Fit 0.990099 for 0
<<< Fit 1.449275 for 1
<<< Fit 1.538462 for 2
<<< Fit 1.030928 for 3
[...] 
                                                                                                                  ******* Fit 0.990099 for -1636178018
******* Fit 1.449275 for 1072593383
******* Fit 1.538462 for -1184818564
******* Fit 1.030928 for 1072042407
******* Fit 2.222222 for -1688619621
******* Fit 2.222222 for 1072388533
[...]

表明barrier无效;而且我不知道该怎么做。有什么想法吗?

1 个答案:

答案 0 :(得分:0)

我能够(在我的同事的帮助下)找到问题所在。从Erlang到OpenCL的绑定存在一个小问题。稍后我会详细介绍我是如何找到它的(只是为了提供我的调试路径),但是现在我想把这个问题标记为已解决。