绑定到CUDA数组的CUDA纹理对象的非标准化线性插值似乎返回了错误的结果。看起来内插值比0.5
小于预期的因子。归一化线性插值似乎正常工作。
这段代码有什么问题吗?在进行非标准化纹理插值时,我们是否期望乘以2?
代码:
#include <iostream>
#include <cstdio>
// simple function to print an array
template <typename T>
void print_array(const T *a, const size_t length) {
for (size_t i=0; i!=length; i++) {
std::cout << "a[" << i << "]: " << a[i] << std::endl;
}
}
// attempt to interpolate linear memory
__global__
void cuda_texture_interpolate(cudaTextureObject_t tex,
float start,
float stop,
int count) {
if (count < 1) { count = 1; }
float h = (stop-start)/((float)count);
float x = start;
float y;
for (int i = 0; i != count; i++) {
y = tex1D<float>(tex,x);
printf("x: %4g ; y: %4g\n",x,y);
x = x + h;
}
y = tex1D<float>(tex,x);
printf("x: %4g ; y: %4g\n",x,y);
}
int main(void) {
// set up host array
int n = 5;
float a_host[5] = {3,2,1,2,3};
printf("printing array on host.\n");
print_array(a_host,n);
// allocate and copy to cuda array
cudaChannelFormatDesc channelDesc =
cudaCreateChannelDesc(32, 0, 0, 0,
cudaChannelFormatKindFloat);
cudaArray* cuArray;
cudaMallocArray(&cuArray, &channelDesc, n);
// Copy to device memory some data located at address h_data
// in host memory
cudaMemcpyToArray(cuArray, 0, 0, a_host, n*sizeof(float),
cudaMemcpyHostToDevice);
// create texture object
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cuArray;
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = cudaAddressModeClamp;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
//texDesc.normalizedCoords = 1;
texDesc.normalizedCoords = 0;
cudaResourceViewDesc resViewDesc;
memset(&resViewDesc, 0, sizeof(resViewDesc));
resViewDesc.format = cudaResViewFormatFloat1;
resViewDesc.width = n;
// create texture object
cudaTextureObject_t tex;
cudaCreateTextureObject(&tex, &resDesc, &texDesc, &resViewDesc);
// call interpolation kernel
printf("interpolate (f(x) -> y).\n");
//cuda_texture_interpolate<<<1,1>>>(tex,0.0,1.0,10);
cuda_texture_interpolate<<<1,1>>>(tex,0.0,5.0,10);
// clean up
cudaDestroyTextureObject(tex);
cudaFreeArray(cuArray);
printf("end of texture_object_interpolation.\n");
return 0;
}
结果:
$ ./texture_object_interpolation
printing array on host.
a[0]: 3
a[1]: 2
a[2]: 1
a[3]: 2
a[4]: 3
interpolate (f(x) -> y).
x: 0 ; y: 1.5
x: 0.5 ; y: 1.5
x: 1 ; y: 1.25
x: 1.5 ; y: 1
x: 2 ; y: 0.75
x: 2.5 ; y: 0.5
x: 3 ; y: 0.75
x: 3.5 ; y: 1
x: 4 ; y: 1.25
x: 4.5 ; y: 1.5
x: 5 ; y: 1.5
end of texture_object_interpolation.
请参阅此gist以获取上述代码,生成文件和规范化插值的代码。
答案 0 :(得分:1)
这显然是由CUDA 5.0编译器中的一个错误引起的,并在CUDA 5.5版本中得到修复。
[这个答案已经从评论中汇总,以便从CUDA标签的未答复队列中解决问题]