我有这个c ++项目,我通过包装函数调用cuda内核。
我的c ++文件看起来像这样(这是 extern.cc ):
#include "extern.h"
#include "qc/operator.h"
#include "qc/quStates.h"
#include "gpu.h"
...
ROUTINE(ext_bit) {
int i;
quState *qbit;
PAR_QUSTATE(q,"q");
opBit *op;
tComplex I(0,1);
tComplex sg= inv ? -1 : 1;
char c=(def->id())[0];
if(def->id().length()!=1) c='?';
switch(c) {
case 'H': op=new opBit(1,1,1,-1,sqrt(0.5)); break;
case 'X': op=new opBit(0,1,1,0); break;
case 'Y': op=new opBit(0,-I,I,0); break;
case 'Z': op=new opBit(1,0,0,-1); break;
case 'S': op=new opBit(1,0,0,sg*I); break;
case 'T': op=new opBit(1,0,0,sqrt(0.5)+sg*sqrt(0.5)*I); break;
case '?':
default: EXTERR("unknown single qubit operator "+def->id());
}
// This is where I call my wrapper function
// the error that I get is: expected primary-expression before ',' token
gpucaller(opBit, q);
qcl_delete(op);
return 0;
}
其中“gpucaller”是我调用内核的包装函数,两者都在 cuda_kernel.cu 中定义:
/* compiling with:
nvcc -arch sm_11 -c -I"/home/glu/NVIDIA_GPU_Computing_SDK/C/common/inc" -I"." -I"./qc" -I"/usr/local/cuda/include" -o cuda_kernel.o cuda_kernel.cu
*/
#ifndef _CUDA_KERNEL_H_
#define _CUDA_KERNEL_H_
#define MAX_QUBITS 25
#define BLOCKDIM 512
#define MAX_TERMS_PER_BLOCK (2*BLOCKDIM)
#define THREAD_MASK (~0ul << 1)
// includes
#include <cutil_inline.h>
#include "gpu.h"
__constant__ float devOpBit[2][2];
__global__ void qcl1(cuFloatComplex *a, int N, int qbCount, int blockGrpSize, int k)
{
//int idx = blockIdx.x * BLOCKDIM + threadIdx.x;
//int tx = threadIdx.x;
cuFloatComplex t0_0, t0_1, t1_0, t1_1;
int x0_idx, x1_idx;
int i, grpSize, b0_idx, b1_idx;
__shared__ cuFloatComplex aS[MAX_TERMS_PER_BLOCK];
...
}
void gpucaller(opBit* op, quBaseState* q) {
// make an operator copy
float** myOpBit = (float**)op->getDeviceReadyOpBit();
unsigned int timer = 0;
cuFloatComplex *a_d;
long int N = 1 << q->mapbits();
int size = sizeof(cuFloatComplex) * N;
// start timer
cutilCheckError( cutCreateTimer( &timer));
cutilCheckError( cutStartTimer( timer));
// allocate device memory
cudaMalloc((void**)&a_d,size);
// copy host memory to device
cudaMemcpy(a_d, q->termsarray, size, cudaMemcpyHostToDevice);
// copy quantic operator to constant memory
cutilSafeCall( cudaMemcpyToSymbol(devOpBit, myOpBit, 2*sizeof(float[2]), 0) );
printf("Cuda errors: %s\n", cudaGetErrorString( cudaGetLastError() ) );
// setup execution parameters
dim3 dimBlock(BLOCKDIM, 1, 1);
int n_blocks = N/MAX_TERMS_PER_BLOCK + (N%MAX_TERMS_PER_BLOCK == 0 ? 0:1);
dim3 dimGrid(n_blocks, 1, 1);
...
// execute the kernel
qcl1<<< dimGrid, dimBlock >>>(a_d, N, gates, blockGrpSize, k);
// check if kernel execution generated and error
cutilCheckMsg("Kernel execution failed");
...
// copy result from device to host
cudaMemcpy(q->termsarray, a_d, size, cudaMemcpyDeviceToHost);
// stop timer
cutilCheckError( cutStopTimer( timer));
//printf( "GPU Processing time: %f (ms)\n", cutGetTimerValue( timer));
cutilCheckError( cutDeleteTimer( timer));
// cleanup memory on device
cudaFree(a_d);
cudaThreadExit();
}
#endif // #ifndef _CUDA_KERNEL_H_
和“ gpu.h ”具有以下内容:
#ifndef _GPU_H_
#define _GPU_H_
#include "qc/operator.h"
#include "qc/qustates.h"
void gpucaller(opBit* op, quBaseState* q);
#endif // #ifndef _GPU_H_
我没有在我的c ++文件中包含.cu文件,我只在c ++和.cu文件中包含.h文件(gpu.h - 包含我的内核调用函数的原型)。
我使用nvcc编译.cu文件,并将生成的.o文件链接到项目的Makefile中。 另外,我没有忘记将“-lcudart”标志添加到Makefile。
问题在于,当我编译我的主项目时,我收到此错误:
expected primary-expression before ',' token
并指的是 extern.cc 中的一行,我称之为“gpucaller”函数。
有谁知道如何做到这一点?
编辑:我再次尝试编译,这次从 gpucaller 的函数定义中删除了参数(显然没有将任何参数传递给函数,这是错误的,因为我需要传递参数)。编译得很好。所以问题是 gpucaller 的参数类型无法识别,我不知道为什么(我已经包含了声明参数'类型的标题,即“qc / operator” .h“和”qc / quStates.h“)。有人有解决方案吗?
我的项目的Makefile是这样的:
VERSION=0.6.3 # Directory for Standard .qcl files QCLDIR = /usr/local/lib/qcl # Path for qcl binaries QCLBIN = /usr/local/bin ARCH = `g++ -dumpmachine || echo bin` # Comment out if you want to compile for a different target architecture # To build libqc.a, you will also have to edit qc/Makefile! #ARCH = i686-linux #ARCHOPT = -m32 -march=i686 # Debugging and optimization options #DEBUG = -g -pg -DQCL_DEBUG -DQC_DEBUG #DEBUG = -g -DQCL_DEBUG -DQC_DEBUG DEBUG = -O2 -g -DQCL_DEBUG -DQC_DEBUG #DEBUG = -O2 # Plotting support # # Comment out if you don't have GNU libplotter and X PLOPT = -DQCL_PLOT PLLIB = -L/usr/X11/lib -lplotter # Readline support # # Comment out if you don't have GNU readline on your system # explicit linking against libtermcap or libncurses may be required RLOPT = -DQCL_USE_READLINE #RLLIB = -lreadline RLLIB = -lreadline -lncurses # Interrupt support # # Comment out if your system doesn't support ANSI C signal handling IRQOPT = -DQCL_IRQ # Replace with lex and yacc on non-GNU systems (untested) LEX = flex YACC = bison INSTALL = install ##### You shouldn't have to edit the stuff below ##### DATE = `date +"%y.%m.%d-%H%M"` QCDIR = qc QCLIB = $(QCDIR)/libqc.a QCLINC = lib #CXX = g++ #CPP = $(CC) -E CXXFLAGS = -c $(ARCHOPT) -Wall $(DEBUG) $(PLOPT) $(RLOPT) $(IRQOPT) -I$(QCDIR) -DDEF_INCLUDE_PATH="\"$(QCLDIR)\"" LDFLAGS = $(ARCHOPT) -L$(QCDIR) $(DEBUG) $(PLLIB) -lm -lfl -lqc $(RLLIB) -L"/usr/local/cuda/lib" -lcudart FILESCC = $(wildcard *.cc) FILESH = $(wildcard *.h) SOURCE = $(FILESCC) $(FILESH) qcl.lex qcl.y Makefile OBJECTS = types.o syntax.o typcheck.o symbols.o error.o \ lex.o yacc.o print.o quheap.o extern.o eval.o exec.o \ parse.o options.o debug.o cond.o dump.o plot.o format.o cuda_kernel.o all: do-it-all ifeq (.depend,$(wildcard .depend)) include .depend do-it-all: build else do-it-all: dep $(MAKE) endif #### Rules for depend dep: lex.cc yacc.cc yacc.h $(QCLIB) for i in *.cc; do \ $(CPP) -I$(QCDIR) -MM $$i; \ done > .depend lex.cc: qcl.lex yacc.h $(LEX) -olex.cc qcl.lex yacc.cc: qcl.y $(YACC) -t -d -o yacc.cc qcl.y yacc.h: yacc.cc mv yacc.*?h yacc.h $(QCLIB): cd $(QCDIR) && $(MAKE) libqc.a #### Rules for build build: qcl $(QCLINC)/default.qcl qcl: $(OBJECTS) qcl.o $(QCLIB) $(CXX) $(OBJECTS) qcl.o $(LDFLAGS) -o qcl $(QCLINC)/default.qcl: extern.cc grep "^//!" extern.cc | cut -c5- > $(QCLINC)/default.qcl checkinst: [ -f ./qcl -a -f $(QCLINC)/default.qcl ] || $(MAKE) build install: checkinst $(INSTALL) -m 0755 -d $(QCLBIN) $(QCLDIR) $(INSTALL) -m 0755 ./qcl $(QCLBIN) $(INSTALL) -m 0644 ./$(QCLINC)/*.qcl $(QCLDIR) uninstall: -rm -f $(QCLBIN)/qcl -rm -f $(QCLDIR)/*.qcl -rmdir $(QCLDIR) #### Other Functions edit: nedit $(SOURCE) & clean: rm -f *.o lex.* yacc.* cd $(QCDIR) && $(MAKE) clean clear: clean rm -f qcl $(QCLINC)/default.qcl .depend cd $(QCDIR) && $(MAKE) clear dist-src: dep mkdir qcl-$(VERSION) cp README CHANGES COPYING .depend $(SOURCE) qcl-$(VERSION) mkdir qcl-$(VERSION)/qc cp qc/Makefile qc/*.h qc/*.cc qcl-$(VERSION)/qc cp -r lib qcl-$(VERSION) tar czf qcl-$(VERSION).tgz --owner=0 --group=0 qcl-$(VERSION) rm -r qcl-$(VERSION) dist-bin: build mkdir qcl-$(VERSION)-$(ARCH) cp Makefile README CHANGES COPYING qcl qcl-$(VERSION)-$(ARCH) cp -r lib qcl-$(VERSION)-$(ARCH) tar czf qcl-$(VERSION)-$(ARCH).tgz --owner=0 --group=0 qcl-$(VERSION)-$(ARCH) rm -r qcl-$(VERSION)-$(ARCH) upload: dist-src scp qcl-$(VERSION)*.tgz oemer@tph.tuwien.ac.at:html/tgz scp: dist-src scp qcl-$(VERSION).tgz oemer@tph.tuwien.ac.at:bak/qcl-$(DATE).tgz
我添加到原始Makefile的唯一更改是将“cuda_kernel.o”添加到OBJECTS行,并将“-lcudart”标志添加到LDFLAGS。
更新:感谢 harrism 帮助我。我传递了一个类型作为参数。
答案 0 :(得分:0)
cuda.h
void gpucaller(type1 param1, type2 param2);
type1
和type2
是否在任何地方声明,以便您的常规C ++编译器知道这些类型是什么?如果没有,那么你会得到一个错误,就像你说的那样。
答案 1 :(得分:0)
您的问题代码目前非常庞大和复杂。尝试将其拆分为更简单的故障案例,并在您拥有该问题后更新您的问题。它会使尝试再现变得更容易。删除cuda计时器代码switch case
,将实现详细信息替换为无关紧要的...
等。
我用msvc和nvcc编译然后用icl链接;所以如果你能做一个简单的例子,我可以看看它是否用完全不同的编译器设置进行编译。这应该缩小问题范围。
即使将自己的标题cuda.h
重命名为somethingspecific.h
也无济于事,但我认为将其保留为cuda.h
并不是一个好主意。这令人困惑,也是潜在的问题来源。
答案 2 :(得分:0)
gpucaller(opBit, q);
您正在传递类型名称(opBit
)作为函数参数,该参数不是有效的C或C ++。看起来你需要这样做:
gpucaller(op, q);