这个程序:
#include <string>
#include <stdexcept>
struct buffers_t {
void* host_buffer;
void* device_buffer;
};
void ensure_no_error(std::string message) {
auto status = cudaGetLastError();
if (status != cudaSuccess) {
throw std::runtime_error(message + ": " + cudaGetErrorString(status));
}
}
void my_callback(cudaStream_t stream, cudaError_t status, void* args) {
auto buffers = (buffers_t *) args;
cudaMemcpyAsync(
buffers->host_buffer, buffers->device_buffer,
1, cudaMemcpyDefault, stream);
ensure_no_error("after cudaMemcpyAsync");
}
int main() {
cudaStream_t stream;
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
buffers_t buffers;
cudaMallocHost(&buffers.host_buffer, 1);
cudaMalloc(&buffers.device_buffer, 1);
cudaStreamAddCallback(stream, my_callback, &buffers, 0);
ensure_no_error("after enqueue callback");
cudaStreamSynchronize(stream);
ensure_no_error("after sync");
}
的产率:
terminate called after throwing an instance of 'std::runtime_error'
what(): after cudaMemcpyAsync: operation not permitted
Aborted
这有点奇怪,因为cudaMemcpyAsync
的API参考未将cudaErrorNotPermitted
列为潜在错误之一。从回调中调度异步副本真的有问题吗?
注意:我的机器有GTX 650 Ti(CC 3.0),CUDA 9.0,Linux内核4.8.0,驱动程序384.59。
答案 0 :(得分:2)