我有一个代表2D数组的无符号字符的线性数组。我想将它放入CUDA 2D纹理并对其执行(浮点)线性插值,即,使纹理调用获取4个最近的无符号字符邻居,在内部将它们转换为float,在它们之间进行插值,并返回结果浮点值。
我在设置纹理并将其绑定到纹理参考时遇到一些困难。我已经阅读了CUDA参考手册&附录,但我没有运气。
下面是可运行的代码,用于设置和绑定1)浮点纹理和2)unsigned char纹理。浮点代码运行得很好。但是,如果将两个注释的unsigned char行取消注释到底部,则会抛出“invalid argument”错误。
#include <cstdio>
#include <cuda_runtime.h>
typedef unsigned char uchar;
// Define (global) texture references; must use "cudaReadModeNormalizedFloat"
// for ordinal textures
texture<float, cudaTextureType2D, cudaReadModeNormalizedFloat> texRefFloat;
texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> texRefUChar;
// Define size of (row major) textures
size_t const WIDTH = 1000;
size_t const HEIGHT = 1000;
size_t const TOT_PIX = WIDTH*HEIGHT;
int main(void)
{
// Set texel formats
cudaChannelFormatDesc descFloat = cudaCreateChannelDesc<float>();
cudaChannelFormatDesc descUChar = cudaCreateChannelDesc<uchar>();
// Choose to perform texture 2D linear interpolation
texRefFloat.filterMode = cudaFilterModeLinear;
texRefUChar.filterMode = cudaFilterModeLinear;
// Allocate texture device memory
float * d_buffFloat; cudaMalloc(&d_buffFloat, sizeof(float)*TOT_PIX);
uchar * d_buffUChar; cudaMalloc(&d_buffUChar, sizeof(uchar)*TOT_PIX);
// Bind texture references to textures
cudaError_t errFloat = cudaSuccess;
cudaError_t errUChar = cudaSuccess;
errFloat = cudaBindTexture2D(0, texRefFloat, d_buffFloat, descFloat,
WIDTH, HEIGHT, sizeof(float)*WIDTH);
// Uncomment the following two lines for an error
//errUChar = cudaBindTexture2D(0, texRefUChar, d_buffUChar, descUChar,
// WIDTH, HEIGHT, sizeof(uchar)*WIDTH);
// Check for errors during binding
if (errFloat != cudaSuccess)
{
printf("Error binding float texture reference: %s\n",
cudaGetErrorString(errFloat));
exit(-1);
}
if (errUChar != cudaSuccess)
{
printf("Error binding unsigned char texture reference: %s\n",
cudaGetErrorString(errUChar));
exit(-1);
}
return 0;
}
非常感谢任何帮助/见解!
亚伦
答案 0 :(得分:7)
纹理的每一行都必须正确对齐。如果将纹理绑定到普通数组(而不是CUDA数组),则通常无法保证这一点。要将普通内存绑定到2D纹理,您需要使用cudaMallocPitch()
分配内存。这设置行间距使其适合于绑定到纹理。请注意,将0作为第一个参数传递给纹理绑定API调用并不是一种好习惯。此参数用于CUDA向应用程序返回偏移量。如果偏移量不为零,则需要在纹理访问期间将其添加到纹理坐标。
这是一个快速示例,演示如何从元素为unsigned char
的纹理中读取插值。
#include <stdlib.h>
#include <stdio.h>
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex;
__global__ void kernel (int m, int n, float shift_x, float shift_y)
{
float val;
for (int row = 0; row < m; row++) {
for (int col = 0; col < n; col++) {
val = tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
printf ("%.2f ", val);
}
printf ("\n");
}
}
int main (void)
{
int m = 4; // height = #rows
int n = 3; // width = #columns
size_t pitch, tex_ofs;
unsigned char arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{251,252,253}};
unsigned char *arr_d = 0;
CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
tex.normalized = false;
tex.filterMode = cudaFilterModeLinear;
CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
n, m, pitch));
if (tex_ofs !=0) {
printf ("tex_ofs = %zu\n", tex_ofs);
return EXIT_FAILURE;
}
printf ("reading array straight\n");
kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaDeviceSynchronize());
printf ("reading array shifted in x-direction\n");
kernel<<<1,1>>>(m, n, 0.5f, 0.0f);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaDeviceSynchronize());
printf ("reading array shifted in y-direction\n");
kernel<<<1,1>>>(m, n, 0.0f, 0.5f);
CUDA_SAFE_CALL (cudaDeviceSynchronize());
CUDA_SAFE_CALL (cudaFree (arr_d));
return EXIT_SUCCESS;
}
该程序的输出如下:
reading array straight
0.04 0.05 0.05
0.08 0.09 0.09
0.12 0.13 0.13
0.98 0.99 0.99
reading array shifted in x-direction
0.05 0.05 0.05
0.08 0.09 0.09
0.12 0.13 0.13
0.99 0.99 0.99
reading array shifted in y-direction
0.06 0.07 0.07
0.10 0.11 0.11
0.55 0.56 0.56
0.98 0.99 0.99