我正在尝试使用CUDA API在计算能力1.3 GPU中执行内核。绑定一维数组按预期工作,但以下代码产生错误:
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
inline static void __checkCudaErrors( cudaError err, const char *file, const int line ) {
if( cudaSuccess != err) {
fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line, (int)err, cudaGetErrorString( err ) );
exit(-1);
}
}
texture<int, cudaTextureType2D> tex_transition;
int main ( void ) {
int m = 8, p_size = 100, alphabet = 20;
size_t pitch;
int *transition = ( int * ) malloc ( ( m * p_size + 1 ) * alphabet * sizeof ( int ) );
memset ( transition, -1, ( m * p_size + 1 ) * alphabet * sizeof ( int ) );
int *d_transition;
checkCudaErrors ( cudaMallocPitch ( &d_transition, &pitch, alphabet * sizeof ( int ), ( m * p_size + 1 ) ) );
checkCudaErrors ( cudaMemcpy2D ( d_transition, pitch, transition, alphabet * sizeof ( int ), alphabet * sizeof ( int ), ( m * p_size + 1 ), cudaMemcpyHostToDevice ) );
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
checkCudaErrors ( cudaBindTexture2D ( 0, tex_transition, d_transition, desc, alphabet * sizeof ( int ), ( m * p_size + 1 ), pitch ) );
cudaFree ( d_transition );
return 0;
}
执行时我得到错误“test.cu(33):CUDA Runtime API错误11:无效参数。 “。通过将字母设置为10,错误消失。如果我没有记错,每个绑定到纹理的数组最大大小为65000 x 65000个单词(在本例中为整数),但过渡数组要小得多。
答案 0 :(得分:2)
你有一个cudaBindTexture2D调用中的参数错误。纹理的尺寸为 texel 单位,而不是字节,因此调用应为:
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaBindTexture2D ( 0,
tex_transition,
d_transition, desc,
alphabet, // in texels
( m * p_size + 1 ), // in texels
pitch );
字节宽度仅在分配调用中是必需的。纹理绑定使用pitch参数来计算2D分配的内存布局。