使用intel offload gfx进行计算无法正常工作

时间:2017-10-01 17:07:17

标签: c++ opencv intel gfx

我已经编写了一个skeletonize函数。它在CPU上运行良好,但是当我尝试卸载到intel显卡时,它不返回任何数据或甚至不返回任何值。我究竟做错了什么? 我正在使用C ++,OpenCV和Intel parallels库。

以下是代码:主要部分是#pragma

void thinning(cv::Mat & inputarray, cv::Mat & outputarray)
{
    //....
    cv::Mat p_enlarged_src = cv::Mat(rows + 2, cols + 2, CV_32FC1);

    //for intel offload
    float* matData_penlarged_src = (float*)p_enlarged_src.data;
    size_t elem_step = p_enlarged_src.step / sizeof(float);
    #ifdef __INTEL_OFFLOAD
        #pragma offload target(gfx) inout(matData_penlarged_src[0:(rows+1)*(cols+1)]) in(elem_step)
        _Cilk_for(int i = 0; i < (rows+2); i++)
        {
            //p_enlarged_src.at<float>(i, 0) = 0.0f;
            matData_penlarged_src[i * elem_step + 0] = 0.0f;
            //p_enlarged_src.at<float>( i, cols+1) = 0.0f;
            matData_penlarged_src[i * elem_step + (cols+1)] = 0.0f;
        }
        #pragma offload target(gfx) inout(matData_penlarged_src[0:(rows+1)*(cols+1)]) in(elem_step)
        _Cilk_for(int j = 0; j < (cols+2); j++)
        {
            //p_enlarged_src.at<float>(0, j) = 0.0f;
            matData_penlarged_src[0 * elem_step + j] = 0.0f;
            //p_enlarged_src.at<float>(rows+1, j) = 0.0f;
            matData_penlarged_src[(rows+1) * elem_step + j] = 0.0f;
        }
        #pragma offload target(gfx) inout(matData_penlarged_src[0:(rows+1)*(cols+1)]) in(elem_step)
        _Cilk_for(int i = 0; i < rows; i++)
        {
            _Cilk_for(int j = 0; j < cols; j++)
            {
                //if (inputarray.at<float>(i, j) >= 20.0f)
                if(matData_penlarged_src[i * elem_step + j] >= 20.0f)
                {
                    //p_enlarged_src.at<float>( i+1, j+1) = 1.0f;
                    matData_penlarged_src[(i+1) * elem_step + (j+1)] = 1.0f;
                }
                else
                    //p_enlarged_src.at<float>( i+1, j+1) = 0.0f;
                    matData_penlarged_src[(i+1) * elem_step + (j+1)] = 0.0f;
            }
        }

    #else
        //same code like above, but only on the CPU and it works!
        }
    #endif

0 个答案:

没有答案