Question

请查看以下代码，该代码执行简单的字符分配

__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){

    char* temp;
    bool found = false;
    for(int i = 0 ; i < 11; i++){
        if(gpuHello[i] == ' '){
            temp = &gpuHello[i+1];
            found = true;

            break;
        }
    }
    bool sth = found;
    finalPoint = temp;

}
int main()
{
    // Testing one concept;
    string hello = "Hello World";
    char* gpuHello;
    cudaMalloc((void**)&gpuHello, 11 * sizeof(char));
    cudaMemcpy(gpuHello, hello.c_str(), 11 * sizeof(char), cudaMemcpyHostToDevice);
    char* didItFind;
    char* whatIsIt = (char*)malloc(5 * sizeof(char));
    seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
    cudaMemcpy(whatIsIt,didItFind, 5 * sizeof(char), cudaMemcpyDeviceToHost);
    cout<<"The pointer points to : " << whatIsIt;
    return 0;
}

我真的不明白，当我打印whatIsIt时，为什么不打印“世界”作为答案，只是打印一些随机字符串。

修改在指出空白字符后更新版本

__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){

    char* temp;
    bool found = false;
    for(int i = 0 ; i < 11; i++){
        if(gpuHello[i] == ' '){
            temp = gpuHello;
            found = true;

            break;
        }
    }
    bool sth = found;
    finalPoint = temp;

}
int main()
{
    // Testing one concept;
    string hello = "Hello World";
    char* gpuHello;
    cudaMalloc((void**)&gpuHello, 12 * sizeof(char));
    cudaMemcpy(gpuHello, hello.c_str(), 12 * sizeof(char), cudaMemcpyHostToDevice);
    char* didItFind;
    char* whatIsIt = (char*)malloc(6 * sizeof(char));
    seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
    cudaMemcpy(whatIsIt,didItFind, 6 * sizeof(char), cudaMemcpyDeviceToHost);
    cout<<"The pointer points to : " << whatIsIt;
    return 0;
}

Answer 1

如果要让内核以您定义的方式运行，则必须通过引用传递finalPoint，而不是按值传递。也许是这样的：

#include <cstdio>
#include <iostream>
#include <string>

using namespace std;

__global__ void seehowpointerwork(char * gpuHello, char ** finalPoint){

    char* temp;
    for(int i = 0 ; i < 11; i++){
        if(gpuHello[i] == ' '){
            temp = &gpuHello[i+1];
            break;
        }
    }
    *finalPoint = temp;
}

inline void gpuAssert(cudaError_t code, char *file, int line, 
                 bool abort=true)
{  
   if (code != cudaSuccess) {
      printf("GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }

int main()
{
    string hello = "Hello World";
    char* gpuHello;
    gpuErrchk( cudaMalloc((void**)&gpuHello, 11 * sizeof(char)) );
    gpuErrchk( cudaMemcpy(gpuHello, hello.data(), 11 * sizeof(char), cudaMemcpyHostToDevice) );
    char ** didItFinda, * didItFindb;
    gpuErrchk( cudaMalloc((void **)&didItFinda, sizeof(char *)) );
    char* whatIsIt = (char*)malloc(5 * sizeof(char));
    seehowpointerwork<<<1,1>>>(gpuHello, didItFinda);
    gpuErrchk( cudaPeekAtLastError() );
    gpuErrchk( cudaMemcpy(&didItFindb, didItFinda, sizeof(char *), cudaMemcpyDeviceToHost) );
    gpuErrchk( cudaMemcpy(whatIsIt, didItFindb, 5 * sizeof(char), cudaMemcpyDeviceToHost) );
    cout<<"The pointer points to : " << whatIsIt << endl;
    return 0;
}

编译并运行时，此版本生成：

$ nvcc -arch=sm_12 -Xptxas="-v" programmer.cu 
ptxas info    : Compiling entry function '_Z17seehowpointerworkPcPS_' for 'sm_12'
ptxas info    : Used 4 registers, 8+16 bytes smem, 8 bytes cmem[1]

$ ./a.out 
The pointer points to : World

目前，主机拷贝的设备将失败，因为didItFind不是有效的设备指针 - 您通过值将其传递给内核，因此内核上的值无法被内核修改。上面的代码包含足够的错误检查以发现此类问题 - 您应该始终检查每个 API调用的返回状态。

简单的char赋值不适用于CUDA

1 个答案: