GPU和CPU上的相同代码但结果不同

时间:2015-07-13 07:47:02

标签: floating-point opencl gpu gpgpu gpu-programming

我在gpu和cpu上实现了BVH Traversal方法。代码相同,可在此处找到:

GPU代码:

bool insideCell(const __global UnstructDataset* dataset, const int cellIndex, const float4 point)
{

  if(!inside(dataset->cell_box[cellIndex], point)) return false;

  if (dataset->cell_point[dataset->cell_points_beg_index[cellIndex]] == 4)
  {
    float barycoords[4];
    float4 cellpoints[4] = {
      dataset->point[dataset->cell_point[dataset->cell_points_beg_index[cellIndex] + 1]],
      dataset->point[dataset->cell_point[dataset->cell_points_beg_index[cellIndex] + 2]],
      dataset->point[dataset->cell_point[dataset->cell_points_beg_index[cellIndex] + 3]],
      dataset->point[dataset->cell_point[dataset->cell_points_beg_index[cellIndex] + 4]]
    };

    const float4 v1v0 = cellpoints[1] - cellpoints[0];
    const float4 v2v0 = cellpoints[2] - cellpoints[0];
    const float4 v3v2 = cellpoints[3] - cellpoints[2];
    const float4 pv0 = point - cellpoints[0];

    float tetvolumeinv = 1.0f / dot(v2v0, cross(v1v0, v3v2));

    // calculate the barycentric coordinates
    barycoords[0] = dot(cellpoints[2] - point, cross(cellpoints[1] - point, v3v2)) * tetvolumeinv;
    barycoords[1] = dot(v2v0, cross(pv0, v3v2)) * tetvolumeinv;
    barycoords[2] = dot(pv0, cross(v1v0, cellpoints[3] - point)) * tetvolumeinv;
    barycoords[3] = 1.0f - barycoords[0] - barycoords[1] - barycoords[2];

    // compute barycentric interpolation
    return true;

  }
}

// Locates the grid element which the point is residing in it.
int locateBVHElemIdx(
  __global UnstructDataset* dataset,
  __global BVHTree* bvh,
  float4 point
  ) {

      uint cell_index = 0;
      int todoOffset = 0, nodeNum = 0;
      int todo[64];

      while(true){

      // if the current node is an interior node
      if(nodeNum >= 0){
        const __global LinearBVHInteriorNode* node = &bvh->interior_nodes[nodeNum];
        const bool insideLeftChild   = inside(node->left_bound, point/*, node->splitAxis*/);
        const bool insideRightChilld = inside(node->right_bound, point/*, node->splitAxis*/);

        if(insideLeftChild && insideRightChilld){
          todo[todoOffset++] = node->children[1];
          nodeNum = node->children[0];

        } else if(insideLeftChild && !insideRightChilld){
          nodeNum = node->children[0];
        } else if(insideRightChilld && !insideLeftChild){
          nodeNum = node->children[1];
        } else {
          if(todoOffset == 0) break;
          nodeNum = todo[--todoOffset];
        }

      } else {

        const __global LinearBVHLeafNode* node = &bvh->leaf_nodes[-nodeNum - 1];

        // return node->lowIdx;

        // inside leaf node 
        for(int i = node->lowIdx; i < node->highIdx; i++){
          if(insideCell(dataset, i, point))
            return i;
        }

        if(todoOffset == 0) break;
        nodeNum = todo[--todoOffset];

      }     

    }

  return 0;
}

CPU代码:

bool UnstructDataset::insideCell(PrimitiveIndex cellIndex, const glm::vec3& point) const {

  if (!m_cellBoxes[cellIndex].contains((float*)&point))
    return false;

    if (m_cellPoints[m_cellPointsBegIndices[cellIndex]] == 4)
  {
    float barycoords[4];
    glm::vec3 cellpoints[4] = {
        m_points[m_cellPoints[m_cellPointsBegIndices[cellIndex] + 1]],
        m_points[m_cellPoints[m_cellPointsBegIndices[cellIndex] + 2]],
        m_points[m_cellPoints[m_cellPointsBegIndices[cellIndex] + 3]],
        m_points[m_cellPoints[m_cellPointsBegIndices[cellIndex] + 4]]
    };

    const glm::vec3 v1v0 = cellpoints[1] - cellpoints[0];
    const glm::vec3 v2v0 = cellpoints[2] - cellpoints[0];
    const glm::vec3 v3v2 = cellpoints[3] - cellpoints[2];
    const glm::vec3 pv0 = point - cellpoints[0];

    float tetvolumeinv = 1.0f / glm::dot(v2v0, glm::cross(v1v0, v3v2));

    // calculate the barycentric coordinates
    barycoords[0] = glm::dot(cellpoints[2] - point, glm::cross(cellpoints[1] - point, v3v2)) * tetvolumeinv;
    barycoords[1] = glm::dot(v2v0, glm::cross(pv0, v3v2)) * tetvolumeinv;
    barycoords[2] = glm::dot(pv0, glm::cross(v1v0, cellpoints[3] - point)) * tetvolumeinv;
    barycoords[3] = 1.0f - barycoords[0] - barycoords[1] - barycoords[2];

    // if the point is in the tetrahedron
    if (barycoords[0] < 0.0f || barycoords[1] < 0.0f || barycoords[2] < 0.0f || barycoords[3] < 0.0f)
        return false;

        return true;

  }
}

bool BVHTree::getBoundingPrimitiveIndex(const UnstructDataset* const datasetPtr, const float point[3], PrimitiveIndex& cell_index) {

    uint32_t idx = 0;
    int todoOffset = 0, nodeNum = 0;
    int todo[64];

    while(true){

    // if the current node is an interior node
    if(nodeNum >= 0){

      const LinearBVHInteriorNode* node = &m_interior_nodes[nodeNum];

      const bool insideLeftChild   = node->leftBound.contains(point/*, node->splitAxis*/);
      const bool insideRightChilld = node->rightBound.contains(point/*, node->splitAxis*/);

      if(insideLeftChild && insideRightChilld){
        todo[todoOffset++] = node->children[1];
        nodeNum = node->children[0];

      } else if(insideLeftChild && !insideRightChilld){
        nodeNum = node->children[0];
      } else if(insideRightChilld && !insideLeftChild){
        nodeNum = node->children[1];
      } else {
        if(todoOffset == 0) break;
        nodeNum = todo[--todoOffset];
      }

    } else {

      const LinearBVHLeafNode* node = &m_leaf_nodes[-nodeNum - 1];

      // inside leaf node 
      for(int i = node->lowIdx; i < node->highIdx; i++){
                if( datasetPtr->insideCell( i, glm::vec3( point[0], point[1], point[2] ) ) ){
          cell_index = i;

          return true;
                }
      }

      if(todoOffset == 0) break;
            nodeNum = todo[--todoOffset];

    }
  }

    return false;
}

问题是当我使用代码时,我会得到非常接近BVH的节点。对我来说,似乎gpu和cpu上的浮点运算不相同,这导致算法导致不同的节点,而两个代码都正常工作。可以吗?

1 个答案:

答案 0 :(得分:0)

正如评论中提到的@jprice,代码中存在错误。因此,我没有检查重心坐标在0.0和1.0之间,看看该点是否实际上在单元格中。修复错误后,两个结果都是相同的。