CUDA纹理对象 - 非标准化模式下的不正确插值

时间:2013-06-26 00:55:20

标签: cuda textures gpu gpgpu interpolation

绑定到CUDA数组的CUDA纹理对象的非标准化线性插值似乎返回了错误的结果。看起来内插值比0.5小于预期的因子。归一化线性插值似乎正常工作。

这段代码有什么问题吗?在进行非标准化纹理插值时,我们是否期望乘以2?

代码:

#include <iostream>
#include <cstdio>

// simple function to print an array
template <typename T>
void print_array(const T *a, const size_t length) {
  for (size_t i=0; i!=length; i++) {
    std::cout << "a[" << i << "]: " << a[i] << std::endl;
  }
}

// attempt to interpolate linear memory
__global__
void cuda_texture_interpolate(cudaTextureObject_t tex,
                              float start,
                              float stop,
                              int count) {
  if (count < 1) { count = 1; }
  float h = (stop-start)/((float)count);
  float x = start;
  float y;
  for (int i = 0; i != count; i++) {
    y = tex1D<float>(tex,x);
    printf("x: %4g ; y: %4g\n",x,y);
    x = x + h;
  }
  y = tex1D<float>(tex,x);
  printf("x: %4g ; y: %4g\n",x,y);
}

int main(void) {
  // set up host array
  int n = 5;
  float a_host[5] = {3,2,1,2,3};
  printf("printing array on host.\n");
  print_array(a_host,n);

  // allocate and copy to cuda array
  cudaChannelFormatDesc channelDesc =
      cudaCreateChannelDesc(32, 0, 0, 0,
                            cudaChannelFormatKindFloat);
  cudaArray* cuArray;
  cudaMallocArray(&cuArray, &channelDesc, n);

  // Copy to device memory some data located at address h_data
  // in host memory
  cudaMemcpyToArray(cuArray, 0, 0, a_host, n*sizeof(float),
                    cudaMemcpyHostToDevice);

  // create texture object
  cudaResourceDesc resDesc;
  memset(&resDesc, 0, sizeof(resDesc));
  resDesc.resType = cudaResourceTypeArray;
  resDesc.res.array.array = cuArray;

  cudaTextureDesc texDesc;
  memset(&texDesc, 0, sizeof(texDesc));
  texDesc.addressMode[0]   = cudaAddressModeClamp;
  texDesc.filterMode       = cudaFilterModeLinear;
  texDesc.readMode         = cudaReadModeElementType;
  //texDesc.normalizedCoords = 1;
  texDesc.normalizedCoords = 0;


  cudaResourceViewDesc resViewDesc;
  memset(&resViewDesc, 0, sizeof(resViewDesc));
  resViewDesc.format = cudaResViewFormatFloat1;
  resViewDesc.width = n;

  // create texture object
  cudaTextureObject_t tex;
  cudaCreateTextureObject(&tex, &resDesc, &texDesc, &resViewDesc);

  // call interpolation kernel
  printf("interpolate (f(x) -> y).\n");
  //cuda_texture_interpolate<<<1,1>>>(tex,0.0,1.0,10);
  cuda_texture_interpolate<<<1,1>>>(tex,0.0,5.0,10);

  // clean up
  cudaDestroyTextureObject(tex);
  cudaFreeArray(cuArray);

  printf("end of texture_object_interpolation.\n");
  return 0;
}

结果:

$ ./texture_object_interpolation
printing array on host.
a[0]: 3
a[1]: 2
a[2]: 1
a[3]: 2
a[4]: 3
interpolate (f(x) -> y).
x:    0 ; y:  1.5
x:  0.5 ; y:  1.5
x:    1 ; y: 1.25
x:  1.5 ; y:    1
x:    2 ; y: 0.75
x:  2.5 ; y:  0.5
x:    3 ; y: 0.75
x:  3.5 ; y:    1
x:    4 ; y: 1.25
x:  4.5 ; y:  1.5
x:    5 ; y:  1.5
end of texture_object_interpolation.

请参阅此gist以获取上述代码,生成文件和规范化插值的代码。

1 个答案:

答案 0 :(得分:1)

这显然是由CUDA 5.0编译器中的一个错误引起的,并在CUDA 5.5版本中得到修复。

[这个答案已经从评论中汇总,以便从CUDA标签的未答复队列中解决问题]