我在从GPU的设备内存中读取时遇到问题。当我将值复制到__device__
内存时,一切正常!
但是当我试图得到结果时,答案有时候是好的,有时候恰好是数组的第一个值!
我有一个像这样的设备数组:
__device__ array[50];
一开始我将一些值复制到:
cudaStatus = cudaMemcpyToSymbol(dev_state, &CipherState, statesize, 0, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
printf(" \n%s\n", cudaGetErrorString(cudaStatus));
getchar();
}
在内核中进行一些更改后, 我尝试从数组中读取值:
Kernel << <8, 16 >> >();
unsigned char CipherState2[50];
cudaStatus = cudaMemcpyFromSymbol(&CipherState2, dev_state, 50*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
printf(" \n%s\n", cudaGetErrorString(cudaStatus));
getchar();
}
结果有时为TRUE,有时是数组的第一个值。
以下是我的更多代码:
//before Kernel Function body
__device__ unsigned char dev_state[128];
//////////////////////////////////////
void test()
{
unsigned char CipherState[128];
for (int i = 0; i<128; i++)
CipherState[i] = 0x01;
cudaError_t cudaStatus;
cudaStatus = cudaMemcpyToSymbol(dev_state, CipherState, 128*sizeof(unsigned char), 0, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
printf(" \n%s\n", cudaGetErrorString(cudaStatus));
getchar();
}
printf("\n initialized:\n 0x");
for (size_t i = 0; i < 16; i+=16)
{
if (i % 16 == 0)
printf("\n0x");
for (int j =0 ; j <=15; j++)
{
printf("%x", CipherState[i+j]);
}
}
// set all of the dev_state to "0x05"
Kernel << <8, 16 >> >();
// until this line, everythings OK
unsigned char CipherState2[128];
cudaStatus = cudaMemcpyFromSymbol(CipherState2, dev_state, 128*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
printf(" \n%s\n", cudaGetErrorString(cudaStatus));
getchar();
}
printf("\n State at the end:\n ");
for (size_t i = 0; i < 16; i+=16)
{
if (i % 16 == 0)
printf("\n0x");
for (int j = 0; j <= 15; j++)
printf("%x", CipherState2[i + j]);
}
}
有时,打印cipherstate2得到这个:
0x55555555555555555 ...... 5555555555
有时候:
0x11111111111111111 ..... 11111111111;
答案 0 :(得分:2)
这是不正确的:
unsigned char CipherState2[50];
cudaStatus = cudaMemcpyFromSymbol(&CipherState2, dev_state, 50*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
^
CipherState2
已经是一个指针。你不应该拿它的地址。相反,你应该像这样打电话:
cudaStatus = cudaMemcpyFromSymbol(CipherState2, dev_state, 50*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
虽然你没有显示CipherState
变量的样子,但你很可能在这里犯了类似的错误:
cudaStatus = cudaMemcpyToSymbol(dev_state, &CipherState, statesize, 0, cudaMemcpyHostToDevice);
^
该调用的正确形式很可能是:
cudaStatus = cudaMemcpyToSymbol(dev_state, CipherState, statesize, 0, cudaMemcpyHostToDevice);
将来,请提供MCVE这样的问题。
举个例子,请注意这不是有效的代码:
__device__ array[50];
也许你的意思是这样的:
__device__ unsigned char dev_state[50];
编辑:您现在发布的代码(在答案中)仍然不完整,但似乎大部分都是正确的。剩下的问题可能在您未显示的内核中,或者您的CUDA安装可能无法正常工作。这是一个完全工作的代码,围绕你所展示的内容(我添加了一个简单的内核)来演示预期的行为(请注意,打印输出的for循环构造不正确,我不认为):
$ cat t966.cu
#include <stdio.h>
//before Kernel Function body
__device__ unsigned char dev_state[128];
//////////////////////////////////////
__global__ void Kernel(){
int idx = threadIdx.x+blockDim.x*blockIdx.x;
if (idx < 128) dev_state[idx] = 0x5;
}
void test()
{
unsigned char CipherState[128];
for (int i = 0; i<128; i++)
CipherState[i] = 0x01;
cudaError_t cudaStatus;
cudaStatus = cudaMemcpyToSymbol(dev_state, CipherState, 128*sizeof(unsigned char), 0, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
printf(" \n%s\n", cudaGetErrorString(cudaStatus));
getchar();
}
printf("\n initialized:\n 0x");
for (size_t i = 0; i < 16; i+=16)
{
if (i % 16 == 0)
printf("\n0x");
for (int j =0 ; j <=15; j++)
{
printf("%x", CipherState[i+j]);
}
}
// set all of the dev_state to "0x05"
Kernel << <8, 16 >> >();
// until this line, everythings OK
unsigned char CipherState2[128];
cudaStatus = cudaMemcpyFromSymbol(CipherState2, dev_state, 128*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
printf(" \n%s\n", cudaGetErrorString(cudaStatus));
getchar();
}
printf("\n State at the end:\n ");
for (size_t i = 0; i < 16; i+=16)
{
if (i % 16 == 0)
printf("\n0x");
for (int j = 0; j <= 15; j++)
printf("%x", CipherState2[i + j]);
}
printf("\n");
}
int main(){
test();
}
$ nvcc t966.cu -o t966
$ cuda-memcheck ./t966
========= CUDA-MEMCHECK
initialized:
0x
0x1111111111111111
State at the end:
0x5555555555555555
========= ERROR SUMMARY: 0 errors
$