从CUDA回调中排队异步副本 - 不允许?

时间:2017-11-01 09:14:50

标签: asynchronous cuda cuda-streams

这个程序:

#include <string>
#include <stdexcept>

struct buffers_t {
    void* host_buffer;
    void* device_buffer;
};

void ensure_no_error(std::string message) {
    auto status = cudaGetLastError();
    if (status != cudaSuccess) {
        throw std::runtime_error(message + ": " + cudaGetErrorString(status));
    }
}

void my_callback(cudaStream_t stream, cudaError_t status, void* args) {
    auto buffers = (buffers_t *) args;
    cudaMemcpyAsync(
        buffers->host_buffer, buffers->device_buffer,
        1, cudaMemcpyDefault, stream);
    ensure_no_error("after cudaMemcpyAsync");
}

int main() {
    cudaStream_t stream;
    cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
    buffers_t buffers;
    cudaMallocHost(&buffers.host_buffer, 1);
    cudaMalloc(&buffers.device_buffer, 1);
    cudaStreamAddCallback(stream, my_callback, &buffers, 0);
    ensure_no_error("after enqueue callback");
    cudaStreamSynchronize(stream);
    ensure_no_error("after sync");
}

的产率:

terminate called after throwing an instance of 'std::runtime_error'
  what():  after cudaMemcpyAsync: operation not permitted
Aborted

这有点奇怪,因为cudaMemcpyAsync的API参考未将cudaErrorNotPermitted列为潜在错误之一。从回调中调度异步副本真的有问题吗?

注意:我的机器有GTX 650 Ti(CC 3.0),CUDA 9.0,Linux内核4.8.0,驱动程序384.59。

1 个答案:

答案 0 :(得分:2)

  

从回调中调度异步副本真的有问题吗?

来自documentation on onstream回调:

  

回调不能直接或间接地进行CUDA API调用,因为如果调用导致死锁,它可能最终会自行等待。