Question

我有一个非常复杂的程序，我简化了它以使我的问题易于理解：我有2个脚本和1个标题： time_analysis.cu ， DSMC_kernel_float.cu 和 DSMC_kernel_float.h ;

这是 time_analysis.cu

#include <cstdlib>
#include <cstdio>
#include <algorithm>
#include <math.h>
#include <cutil.h>
#include <stdio.h>
#include <assert.h>
#include <memory.h>
#include <string.h>
#include <time.h>
#include <cuda_gl_interop.h>
#include <cutil_math.h>
#include "math_constants.h"
#include "vector_types.h"
#include "vector_functions.h"

typedef struct {
int seme;
} iniran;

typedef struct{
int jp1;
int jp2;
float kx;
float ky;
float kz;

} stato_struct;

stato_struct* coll_CPU=0;
stato_struct* coll2dev=0;
stato_struct* coll_GPU=0;

#include "DSMC_kernel_float.h"

//==============================================================
int main(void){
int N_thread = 4;
int ind;
coll_CPU[0].jp1= 0;
coll_CPU[1].jp2= 1;
coll_CPU[2].kx= 2;
coll_CPU[3].ky= 3;
coll_CPU[4].kz= 4;

for(ind=0;ind<=5;ind++){
    coll2dev[ind]=coll_CPU[ind];
}

coll2dev=(stato_struct*) malloc(N_thread*sizeof(stato_struct));

CUDA_SAFE_CALL(cudaMalloc((void**)&coll_GPU, N_thread*sizeof(stato_struct)));
CUDA_SAFE_CALL(cudaMemcpy(coll_GPU,coll2dev,N_thread*sizeof(stato_struct), cudaMemcpyHostToDevice));

CollisioniGPU<<<4,N_thread>>>(coll_GPU);
CUT_CHECK_ERROR("Esecuzione kernel fallita");

CUDA_SAFE_CALL(cudaMemcpy(coll2dev, coll_GPU, N_thread*sizeof(stato_struct),cudaMemcpyDeviceToHost));

free(coll2dev);
CUDA_SAFE_CALL(cudaFree(coll_GPU));

free(coll_CPU);

return 0;
}

这是 DSMC_kernel_float.cu

// Kernel della DSMC
#include "DSMC_kernel_float.h"

__global__ void CollisioniGPU(stato_struct *coll_GPU){

coll_GPU[0].vAx=1;  
coll_GPU[1].vAy=1;
coll_GPU[2].vAz=1;
coll_GPU[3].tetaAp=1;
coll_GPU[4].phiAp=1;
}

这是 DSMC_kernel_float.h

__global__ void CollisioniGPU(stato_struct* coll_GPU);

然而，当我在终端中输入nvcc -I common/inc -rdc=true time_analysis.cu DSMC_kernel_float.cu时，我收到一个奇怪的消息错误，我不明白为什么

DSMC_kernel_float.h(1): error: attribute "global" does not apply here

DSMC_kernel_float.h(1): error: incomplete type is not allowed

DSMC_kernel_float.h(1): error: identifier "stato_struct" is undefined

DSMC_kernel_float.h(1): error: identifier "coll_GPU" is undefined

DSMC_kernel_float.cu(4): error: variable "CollisioniGPU" has already been defined

DSMC_kernel_float.cu(4): error: attribute "global" does not apply here

DSMC_kernel_float.cu(4): error: incomplete type is not allowed

DSMC_kernel_float.cu(4): error: expected a ";"

At end of source: warning: parsing restarts here after previous syntax error

8 errors detected in the compilation of "/tmp/tmpxft_00003f1f_00000000-22_DSMC_kernel_float.cpp1.ii".

根据我在互联网上看到的内容，我认为错误是由struct导致的，但我不明白我是如何解决它以使程序正常工作的;如果我有其他例子似乎没问题，全局如何可能不适用于此？

注意： commom / inc 是Nvidia提供的文件夹，以便正确编译Cuda。

Answer 1

关于这句话：

注意：commom / inc是Nvidia提供的文件夹，用于正确编译Cuda。

这是一个错误的描述。引用的文件（cutil.h和cutil_math.h）和宏（例如CUT_CHECK_ERROR）是在相当旧的CUDA版本（在CUDA 5.0之前）中提供的，作为 cuda示例代码的一部分当时交付。它们不必需“才能使Cuda正确编译。”此外，它们的使用应被视为已弃用（请参阅CUDA 5.0 toolkit release notes）。如果您实际上正在使用这样的旧工具包，我建议升级到更新的工具包。

关于编译问题，正如@talonmies所指出的那样，当编译任何不包含定义的模块（无论是直接的还是包含的）时，编译器无法知道stato_struct的定义是什么。这就是你的DSMC_kernel_float.cu模块的情况，这是所有编译错误的来源。

乍一看，似乎一个合理的解决方法是将包含typedef定义的stato_struct从time_analysis.cu文件移动到头文件（DSMC_kernel_float.h）中并移动#include语句到time_analysis.cu文件的顶部，以及其他包含。

但是，您的DSMC_kernel_analysis.cu文件似乎认为stato_struct有多种成员：

__global__ void CollisioniGPU(stato_struct *coll_GPU){

coll_GPU[0].vAx=1;  
coll_GPU[1].vAy=1;
coll_GPU[2].vAz=1;
coll_GPU[3].tetaAp=1;
coll_GPU[4].phiAp=1;
}

不是您当前stato_struct定义的一部分：

typedef struct{
int jp1;
int jp2;
float kx;
float ky;
float kz;

} stato_struct;

所以这是令人困惑的代码，我认为没有其他人可以为你解决这个问题。您将需要两个单独的结构定义，使用不同的名称，否则您需要修改stato_struct定义以包含这些成员（.vAx，.vAy，.vAz， .tetaAp，.phiAp）。

此结构定义的（错误）处理以及由此产生的错误与CUDA无关。这是由C / C ++语言期望引起的。

全球职能未被CUDA C认可

1 个答案: