Question

我有一个文本字符数组，里面填充了文本文件中的单词。

例如
text[0] = "one";，
text[1] = "two";
等等。

所有数组元素都等于线程数。我想使用CUDA将所有文本数组打印到屏幕。我尝试使用以下代码进行打印，但它不起作用。我很困惑如何将像text[][]这样的数组传递给CUDA内核函数。

#define MAX_SIZE 100   
#define elements 20 

__global__ void calculate(char *d_text) {
  int idx = threadIdx.x;
  printf("test %s /n", d_text[idx]);
}

int main() {
  char text[MAX_SIZE][MAX_SIZE]; // have text array with words
  char *d_text;

  cudaMalloc((void **)&d_data, DATA_BYTES);

  cudaMemcpy(d_text, text, STRING_BYTES, cudaMemcpyHostToDevice);

  calculate << < 1, elements >> > (d_text);

  cudaDeviceSynchronize();
}

Answer 1

在主机和设备之间移动数据时，CUDA中的多维数组通常需要“深度复制”操作。但是，在这种情况下，我们可以利用数组宽度在MAX_SIZE处固定（在编译时已知）的事实。我们可以定义这种宽度的类型，使2D数组处理与1D数组处理一样简单：

$ cat t426.cu
#include <stdio.h>

const char s1[] = "one\0";
const char s2[] = "two\0";
const int MAX_SIZE = 10;
typedef char carr[MAX_SIZE];

__global__ void calculate(carr *text) {
  int idx = threadIdx.x;
  printf("test %s \n", text[idx]);
}

int main() {
  char text[MAX_SIZE][MAX_SIZE]; // have text array with words
  memcpy(text[0], s1, sizeof(s1));
  memcpy(text[1], s2, sizeof(s2));
  carr *d_text;

  cudaMalloc((void **)&d_text, MAX_SIZE*MAX_SIZE);

  cudaMemcpy(d_text, text, MAX_SIZE*MAX_SIZE, cudaMemcpyHostToDevice);

  calculate << < 1, 2 >> > (d_text);

  cudaDeviceSynchronize();
}
$ nvcc -arch=sm_61 -o t426 t426.cu
$ cuda-memcheck ./t426
========= CUDA-MEMCHECK
test one
test two
========= ERROR SUMMARY: 0 errors
$

有关CUDA中2D或多维数组的更一般处理，请参阅this answer。

如何从CUDA内核代码中的char数组元素中打印单词？

1 个答案: