Question

我正在研究一个openCL内核，它会加载一些点，决定哪个点最高，然后返回它。一切都很好，但我想在最高评价之前添加一个计算。这将点与一对线进行比较。我有写作和学位，如下：

    size_t i = group_id * group_stride + local_id;
    while (i < n){
        //load up a pair of points using the index to locate them within a massive dataSet
        int ia = LOAD_GLOBAL_I1(input, i);
        float4 a = LOAD_GLOBAL_F4(dataSet, ia);

        int ib = LOAD_GLOBAL_I1(input, i + group_size);
        float4 b = LOAD_GLOBAL_F4(dataSet, ib);

        //pre-assess the points relative to lines
        if(pass == 0){
            float px = a.x;
            float py = a.y;
            int checkAnswer;
        //want to write this section as a function
            float x1 = tri_input[0].x; float y1 = tri_input[0].y;
            float x2 = tri_input[2].x; float y2 = tri_input[2].y;
            float check = sign((x1-x2) * (py-y1) - (y2-y1) * (px-x1));
            if(check != tri_input[3].x){        //point is outside line 1
                checkAnswer = 1;
            }
            else{
                x1 = tri_input[2].x;  y1 = tri_input[2].y;
                x2 = tri_input[1].x;  y2 = tri_input[1].y;
                check = sign((x1-x2)*(py-y1) - (y2-y1)*(px-x1));
                if(check != tri_input[3].y){    //point is outside line 2
                    checkAnswer = 2;
                }
                else{
                    checkAnswer = 0;            //point is within both lines
        }}}

        //later use the checkAnswer result to change the following
        //find the highest of the pair
        float4 result;
        if(a.z>b.z) result = a;
        else result = b;

        //load up the previous highest result locally
        float4 s = LOAD_LOCAL_F4(shared, local_id);

        //if the previous highest beat this, stick, else twist
        if(s.z>result.z){ STORE_LOCAL_F4(shared, local_id, s);}
        else{ STORE_LOCAL_F4(shared, local_id, result);}
        i += local_stride;
    }

我想做的是将行检查作为函数调用两次，即代码变为：

   size_t i = group_id * group_stride + local_id;
   while (i < n){
        //load up a pair of points using the index to locate them within a massive dataSet
        int ia = LOAD_GLOBAL_I1(input, i);
        float4 a = LOAD_GLOBAL_F4(dataSet, ia);

        int ib = LOAD_GLOBAL_I1(input, i + group_size);
        float4 b = LOAD_GLOBAL_F4(dataSet, ib);

        //pre-assess the points relative to lines
        if(pass == 0){
            float px = a.x;
            float py = a.y;
            int checkA = pointCheck( px,  py, tri_input);
            px = b.x;
            py = b.y;
            int checkB = pointCheck( px,  py, tri_input);
        }

        //later use the checkAnswer result to change the following
        //find the highest of the pair
        float4 result;
        if(a.z>b.z) result = a;
        else result = b;

        //load up the previous highest result locally
        float4 s = LOAD_LOCAL_F4(shared, local_id);

        //if the previous highest beat this, stick, else twist
        if(s.z>result.z){ STORE_LOCAL_F4(shared, local_id, s);}
        else{ STORE_LOCAL_F4(shared, local_id, result);}
        i += local_stride;
    }

在这个例子中，函数是：

int pointCheck( float *px,  float *py, float2 *testLines){

float x1 = testLines[0].x; float y1 = testLines[0].y;
float x2 = testLines[2].x; float y2 = testLines[2].y;
float check = sign((x1-x2) * (py-y1) - (y2-y1) * (px-x1));
if(check != testLines[3].x){ //point is outside line 1
    return 1;
}
else{
    x1 = testLines[2].x;  y1 = testLines[2].y;
    x2 = testLines[1].x;  y2 = testLines[1].y;
    check = sign((x1-x2)*(py-y1) - (y2-y1)*(px-x1));
    if(check != testLines[3].y){ //point is outside line 2
        return 2;
    }
    else{
        return 0; //point is within both lines
}}}

虽然longhand版本运行正常并返回正常的“最高点”结果，但函数版本返回错误结果（未检测到我隐藏在数据集中的最高点）。即使函数尚未产生整体效果，它也会产生错误的结果。

我做错了什么？

取值

[更新]：这个修改过的函数可以在注释掉的行中使用，然后挂起来：

int pointCheck(float4 *P, float2 *testLines){

float2 *l0 = &testLines[0];
float2 *l1 = &testLines[1];
float2 *l2 = &testLines[2];
float2 *l3 = &testLines[3];

float x1 = l0->x; float y1 = l0->y;
float x2 = l2->x; float y2 = l2->y;

float pX = P->x; float pY = P->y;
float c1 = l3->x; float c2 = l3->y;

//float check = sign((x1-x2) * (pY-y1) - (y2-y1) * (pX-x1)); //seems to be a problem with sign
//    if(check != c1){            //point is outside line 1
//        return 1;
//    }
//    else{
//        x1 = l2->x; y1 = l2->y;
//        x2 = l1->x; y2 = l1->y;
//        check = sign((x1-x2) * (pY-y1) - (y2-y1) * (pX-x1));
//        if(check != c2){        //point is outside line 2
//            return 2;
//        }
//        else{
//            return 0;           //point is within both lines
//    }}
}

Answer 1

一个直接的问题是如何将参数传递给被调用的函数：

/* Styles go here */
.breadcrumb_nav_div{
  top:44px;
  position:absolute;
  width:100%;
  background-color:lightgrey;
  padding:0.8em 0em 0.5em 1em;
}
.breadcrumb { 
            list-style: none; 
            overflow: hidden; 
            font: 14px Helvetica, Arial, Sans-Serif;
        }
        .breadcrumb li { 
            float: left; 
      margin-left:5px

        }
        .breadcrumb li a {
            color: white;
            text-decoration: none; 
            padding: 10px 0px 10px 55px;
            background: grey;                   /* fallback color */
            background: grey; 
            position: relative; 
            display: block;
            float: left;
        }
        .breadcrumb li a:after { 
            content: " "; 
            display: block; 
            width: 0; 
            height: 0;
            border-top: 50px solid transparent;           /* Go big on the size, and let overflow hide */
            border-bottom: 50px solid transparent;
            border-left: 30px solid grey;
            position: absolute;
            top: 50%;
            margin-top: -50px; 
            left: 100%;
            z-index: 2; 
        }   
        .breadcrumb li a:before { 
            content: " "; 
            display: block; 
            width: 0; 
            height: 0;
            border-top: 50px solid transparent;           /* Go big on the size, and let overflow hide */
            border-bottom: 50px solid transparent;
            border-left: 30px solid white;
            position: absolute;
            top: 50%;
            margin-top: -50px; 
            margin-left: 1px;
            left: 100%;
            z-index: 1; 
        }   
        .breadcrumb li:first-child a {
            padding-left: 10px;
        }
        .breadcrumb li:nth-child(2) a       { background:        grey; }
        .breadcrumb li:nth-child(2) a:after { border-left-color: grey; }
        .breadcrumb li:nth-child(3) a       { background: grey; }
        .breadcrumb li:nth-child(3) a:after { border-left-color: grey; }
        .breadcrumb li:nth-child(4) a       { background:grey; }
        .breadcrumb li:nth-child(4) a:after { border-left-color: grey; }

而函数本身需要px和py的指针。您应该将该函数称为：

int checkA = pointCheck( px,  py, tri_input);

令人惊讶的是，OpenCL不会为此内核提供构建错误。

根据我的经验，一些OpenCL运行时不喜欢单个函数中的多个return语句。尝试将返回值保存到局部变量中，并在函数末尾使用单个return语句。这是因为OpenCL不支持实际函数调用，而是将所有函数直接内联到内核中。因此，最佳做法是将所有非int checkA = pointCheck(&px, &py, tri_input);函数标记为__kernel，并将其视为这样（即，通过不使用多个return语句，使编译器更容易内联函数）。

OpenCL函数调用

1 个答案: