我有一个带有生产者线程和OpenGL线程的GUI应用程序,OpenGL线程需要调用CUDA函数,生产者需要调用cudaMemcpy
等。
无论我做什么,我似乎无法让CUDA驱动程序api工作。每当我尝试使用这些功能时,我都会得到cudaErrorMissingConfiguration
。
我想使用多线程CUDA,实现这一目标的范式是什么?
原始
void program::initCuda()
{
CUresult a;pctx=0;
cudaSafeCall(cudaSetDevice(0));
cudaSafeCall(cudaGLSetGLDevice(0));
a=cuInit(0);
cudaSafeCall(cudaFree(0));
cout <<"cuInit :" <<a << endl;assert(a == cudaSuccess);
//a=cuCtxGetCurrent(pctx);
a=cuCtxCreate(pctx,CU_CTX_SCHED_AUTO,0);
cout <<"GetContext :" <<a << endl;assert(a == cudaSuccess);
//Fails with cudaErrorMissingConfiguration
a=cuCtxPopCurrent(pctx);
cout <<"cuCtxPopCurrent :" <<a << endl;assert(a == cudaSuccess);
cout <<"Initialized CUDA" << endl;
}
修
void glStream::initCuda()
{
CUresult a;
pctx=0;
cudaSafeCall(cudaSetDevice(0));
cudaSafeCall(cudaGLSetGLDevice(0));
cudaFree(0);// From post http://stackoverflow.com/questions/10415204/how-to-create-a-cuda-context seems to indicate that `cudaSetDevice` should make a context.
a=cuCtxGetCurrent(pctx);
cout <<"GetContext :" <<a << endl;assert(a == cudaSuccess);
a=cuCtxPopCurrent(pctx);
cout <<"cuCtxPopCurrent :" <<a << endl;assert(a == cudaSuccess);
cout <<"Initialized CUDA" << endl;
}
答案 0 :(得分:3)
第二个代码的最简单版本应如下所示:
#include <iostream>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
int main(void)
{
CUresult a;
CUcontext pctx;
cudaSetDevice(0); // runtime API creates context here
a = cuCtxGetCurrent(&pctx);
std::cout << "GetContext : " << a << std::endl;
assert(a == CUDA_SUCCESS);
a = cuCtxPopCurrent(&pctx);
std::cout << "cuCtxPopCurrent : " << a << std::endl;
assert(a == CUDA_SUCCESS);
std::cout << "Initialized CUDA" << std::endl;
return 0;
}
在OS X 10.6上使用CUDA 5.0产生以下内容:
$ g++ -I/usr/local/cuda/include -L/usr/local/cuda/lib driver.cc -lcuda -lcudart
$ ./a.out
GetContext :0
cuCtxPopCurrent :0
Initialized CUDA
即。 “只是有效”。这里的上下文是由cudaSetDevice
调用延迟启动的(注意我错误地断言cudaSetDevice
没有建立上下文,但至少在CUDA 5中看起来如此。当运行时这种行为可能已经改变API在CUDA 4)中进行了修订。
或者,您可以使用驱动程序API来启动上下文:
#include <iostream>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
int main(void)
{
CUresult a;
CUcontext pctx;
CUdevice device;
cuInit(0);
cuDeviceGet(&device, 0);
std::cout << "DeviceGet : " << a << std::endl;
cuCtxCreate(&pctx, CU_CTX_SCHED_AUTO, device ); // explicit context here
std::cout << "CtxCreate : " << a << std::endl;
assert(a == CUDA_SUCCESS);
a = cuCtxPopCurrent(&pctx);
std::cout << "cuCtxPopCurrent : " << a << std::endl;
assert(a == CUDA_SUCCESS);
std::cout << "Initialized CUDA" << std::endl;
return 0;
}
也“正常”:
$ g++ -I/usr/local/cuda/include -L/usr/local/cuda/lib driver.cc -lcuda -lcudart
$ ./a.out
DeviceGet : 0
CtxCreate : 0
cuCtxPopCurrent : 0
Initialized CUDA
你不应该做的就是在你的第一个例子中混合两者。我所能建议的是尝试这两种方法并确认它们适合您,然后采用呼叫序列来实现您实际想要实现的目标。