请查看以下代码,该代码执行简单的字符分配
__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){
char* temp;
bool found = false;
for(int i = 0 ; i < 11; i++){
if(gpuHello[i] == ' '){
temp = &gpuHello[i+1];
found = true;
break;
}
}
bool sth = found;
finalPoint = temp;
}
int main()
{
// Testing one concept;
string hello = "Hello World";
char* gpuHello;
cudaMalloc((void**)&gpuHello, 11 * sizeof(char));
cudaMemcpy(gpuHello, hello.c_str(), 11 * sizeof(char), cudaMemcpyHostToDevice);
char* didItFind;
char* whatIsIt = (char*)malloc(5 * sizeof(char));
seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
cudaMemcpy(whatIsIt,didItFind, 5 * sizeof(char), cudaMemcpyDeviceToHost);
cout<<"The pointer points to : " << whatIsIt;
return 0;
}
我真的不明白,当我打印whatIsIt
时,为什么不打印“世界”作为答案,只是打印一些随机字符串。
修改 在指出空白字符后更新版本
__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){
char* temp;
bool found = false;
for(int i = 0 ; i < 11; i++){
if(gpuHello[i] == ' '){
temp = gpuHello;
found = true;
break;
}
}
bool sth = found;
finalPoint = temp;
}
int main()
{
// Testing one concept;
string hello = "Hello World";
char* gpuHello;
cudaMalloc((void**)&gpuHello, 12 * sizeof(char));
cudaMemcpy(gpuHello, hello.c_str(), 12 * sizeof(char), cudaMemcpyHostToDevice);
char* didItFind;
char* whatIsIt = (char*)malloc(6 * sizeof(char));
seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
cudaMemcpy(whatIsIt,didItFind, 6 * sizeof(char), cudaMemcpyDeviceToHost);
cout<<"The pointer points to : " << whatIsIt;
return 0;
}
答案 0 :(得分:2)
如果要让内核以您定义的方式运行,则必须通过引用传递finalPoint
,而不是按值传递。也许是这样的:
#include <cstdio>
#include <iostream>
#include <string>
using namespace std;
__global__ void seehowpointerwork(char * gpuHello, char ** finalPoint){
char* temp;
for(int i = 0 ; i < 11; i++){
if(gpuHello[i] == ' '){
temp = &gpuHello[i+1];
break;
}
}
*finalPoint = temp;
}
inline void gpuAssert(cudaError_t code, char *file, int line,
bool abort=true)
{
if (code != cudaSuccess) {
printf("GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
int main()
{
string hello = "Hello World";
char* gpuHello;
gpuErrchk( cudaMalloc((void**)&gpuHello, 11 * sizeof(char)) );
gpuErrchk( cudaMemcpy(gpuHello, hello.data(), 11 * sizeof(char), cudaMemcpyHostToDevice) );
char ** didItFinda, * didItFindb;
gpuErrchk( cudaMalloc((void **)&didItFinda, sizeof(char *)) );
char* whatIsIt = (char*)malloc(5 * sizeof(char));
seehowpointerwork<<<1,1>>>(gpuHello, didItFinda);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaMemcpy(&didItFindb, didItFinda, sizeof(char *), cudaMemcpyDeviceToHost) );
gpuErrchk( cudaMemcpy(whatIsIt, didItFindb, 5 * sizeof(char), cudaMemcpyDeviceToHost) );
cout<<"The pointer points to : " << whatIsIt << endl;
return 0;
}
编译并运行时,此版本生成:
$ nvcc -arch=sm_12 -Xptxas="-v" programmer.cu
ptxas info : Compiling entry function '_Z17seehowpointerworkPcPS_' for 'sm_12'
ptxas info : Used 4 registers, 8+16 bytes smem, 8 bytes cmem[1]
$ ./a.out
The pointer points to : World
目前,主机拷贝的设备将失败,因为didItFind
不是有效的设备指针 - 您通过值将其传递给内核,因此内核上的值无法被内核修改。上面的代码包含足够的错误检查以发现此类问题 - 您应该始终检查每个 API调用的返回状态。