为线性插值设置CUDA 2D“unsigned char”纹理

时间:2013-06-12 21:24:32

标签: cuda textures interpolation

我有一个代表2D数组的无符号字符的线性数组。我想将它放入CUDA 2D纹理并对其执行(浮点)线性插值,即,使纹理调用获取4个最近的无符号字符邻居,在内部将它们转换为float,在它们之间进行插值,并返回结果浮点值。

我在设置纹理并将其绑定到纹理参考时遇到一些困难。我已经阅读了CUDA参考手册&附录,但我没有运气。

下面是可运行的代码,用于设置和绑定1)浮点纹理和2)unsigned char纹理。浮点代码运行得很好。但是,如果将两个注释的unsigned char行取消注释到底部,则会抛出“invalid argument”错误。

#include <cstdio>
#include <cuda_runtime.h>

typedef unsigned char uchar;

// Define (global) texture references; must use "cudaReadModeNormalizedFloat"
// for ordinal textures
texture<float, cudaTextureType2D, cudaReadModeNormalizedFloat> texRefFloat;
texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> texRefUChar;

// Define size of (row major) textures
size_t const WIDTH  = 1000;
size_t const HEIGHT = 1000;
size_t const TOT_PIX = WIDTH*HEIGHT;

int main(void)
{
   // Set texel formats
   cudaChannelFormatDesc descFloat = cudaCreateChannelDesc<float>();
   cudaChannelFormatDesc descUChar = cudaCreateChannelDesc<uchar>();

   // Choose to perform texture 2D linear interpolation
   texRefFloat.filterMode = cudaFilterModeLinear;
   texRefUChar.filterMode = cudaFilterModeLinear;

   // Allocate texture device memory
   float * d_buffFloat; cudaMalloc(&d_buffFloat, sizeof(float)*TOT_PIX);
   uchar * d_buffUChar; cudaMalloc(&d_buffUChar, sizeof(uchar)*TOT_PIX);

   // Bind texture references to textures
   cudaError_t errFloat = cudaSuccess;
   cudaError_t errUChar = cudaSuccess;

   errFloat = cudaBindTexture2D(0, texRefFloat, d_buffFloat, descFloat,
                  WIDTH, HEIGHT, sizeof(float)*WIDTH);
   // Uncomment the following two lines for an error
   //errUChar = cudaBindTexture2D(0, texRefUChar, d_buffUChar, descUChar,
   //               WIDTH, HEIGHT, sizeof(uchar)*WIDTH);

   // Check for errors during binding
   if (errFloat != cudaSuccess)
   {
      printf("Error binding float texture reference: %s\n",
          cudaGetErrorString(errFloat));
      exit(-1);
   }

   if (errUChar != cudaSuccess)
   {
      printf("Error binding unsigned char texture reference: %s\n",
          cudaGetErrorString(errUChar));
      exit(-1);
   }

   return 0;
}

非常感谢任何帮助/见解!

亚伦

1 个答案:

答案 0 :(得分:7)

纹理的每一行都必须正确对齐。如果将纹理绑定到普通数组(而不是CUDA数组),则通常无法保证这一点。要将普通内存绑定到2D纹理,您需要使用cudaMallocPitch()分配内存。这设置行间距使其适合于绑定到纹理。请注意,将0作为第一个参数传递给纹理绑定API调用并不是一种好习惯。此参数用于CUDA向应用程序返回偏移量。如果偏移量不为零,则需要在纹理访问期间将其添加到纹理坐标。

这是一个快速示例,演示如何从元素为unsigned char的纹理中读取插值。

#include <stdlib.h>
#include <stdio.h>

// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call)                                          \
do {                                                                  \
    cudaError_t err = call;                                           \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR()                                          \
do {                                                                  \
    /* Check synchronous errors, i.e. pre-launch */                   \
    cudaError_t err = cudaGetLastError();                             \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
    /* Check asynchronous errors, i.e. kernel failed (ULF) */         \
    err = cudaThreadSynchronize();                                    \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString( err) );      \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)

texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex;

__global__ void kernel (int m, int n, float shift_x, float shift_y) 
{
    float val;
    for (int row = 0; row < m; row++) {
        for (int col = 0; col < n; col++) {
            val = tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
            printf ("%.2f  ", val);
        }
        printf ("\n");
    }
}

int main (void)
{
    int m = 4; // height = #rows
    int n = 3; // width  = #columns
    size_t pitch, tex_ofs;
    unsigned char arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{251,252,253}};
    unsigned char *arr_d = 0;

    CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
    CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
                                n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
    tex.normalized = false;
    tex.filterMode = cudaFilterModeLinear;
    CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
                                       n, m, pitch));
    if (tex_ofs !=0) {
        printf ("tex_ofs = %zu\n", tex_ofs);
        return EXIT_FAILURE;
    }
    printf ("reading array straight\n");
    kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted in x-direction\n");
    kernel<<<1,1>>>(m, n, 0.5f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted in y-direction\n");
    kernel<<<1,1>>>(m, n, 0.0f, 0.5f);
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    CUDA_SAFE_CALL (cudaFree (arr_d));
    return EXIT_SUCCESS;
}

该程序的输出如下:

reading array straight
0.04  0.05  0.05
0.08  0.09  0.09
0.12  0.13  0.13
0.98  0.99  0.99
reading array shifted in x-direction
0.05  0.05  0.05
0.08  0.09  0.09
0.12  0.13  0.13
0.99  0.99  0.99
reading array shifted in y-direction
0.06  0.07  0.07
0.10  0.11  0.11
0.55  0.56  0.56
0.98  0.99  0.99