我正在试验Thrust示例monte-carlo.ru:
https://github.com/thrust/thrust/blob/master/examples/monte_carlo.cu。
这个问题出现在代码的和平中:
float estimate = thrust::transform_reduce(thrust::counting_iterator<int>(0),
thrust::counting_iterator<int>(M),
estimate_pi(),
0.0f,
thrust::plus<float>());
当我将transform_reduce方法的输入序列长度增加到大于M = 87000时,我得到一个错误:
&#34;同步:launch_closure_by_value:未知错误&#34;
错误之前,屏幕变黑了几秒钟,然后在系统托盘中我看到一条消息&#34;视频驱动程序NVidia停止响应并成功恢复了#34; (我的翻译)然后我重新启动计算机,因为它的行为不稳定。
当我尝试使用cuda-memcheck时,情况发生了变化:我已经得到了相同的错误,长度为M = 30000,尽管在没有cuda-memcheck的情况下运行.exe时程序会成功结束此长度。
以下是cuda-memcheck输出中的几行:
========= Program hit cudaErrorUnknown (error 30) due to "unknown error" on CUDA API call to cudaThreadSynchronize. ========= Saved host backtrace up to driver entry point at error ========= Host Frame:C:\Windows\system32\nvcuda.dll (cuProfilerStop + 0xc2d92) [0xe06b2] ========= Host Frame:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\cudart64_65.dll (cudaThreadSynchronize + 0xf5) [0x19585] ========= Host Frame:C:\test\Monte_carlo.exe (thrust::system::cuda::detail::synchronize + 0x47) [0x11117] ... ========= Program hit cudaErrorUnknown (error 30) due to "unknown error" on CUDA API call to cudaFree. ========= Saved host backtrace up to driver entry point at error ========= Host Frame:C:\Windows\system32\nvcuda.dll (cuProfilerStop + 0xc2d92) [0xe06b2] ========= Host Frame:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\cudart64_65.dll (cudaFree + 0xfd) [0x1d28d] ========= Host Frame:C:\test\Monte_carlo.exe (thrust::system::cuda::detail::free > + 0x50) [0x5fa0]
下面还有该程序的完整代码。我在原来只添加了2个更改:try-catch around transform_reduce并从控制台输入M. 我怎样才能理解这个错误的原因?
#include <thrust/random.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/functional.h>
#include <thrust/transform_reduce.h>
#include <iostream>
#include <iomanip>
#include <cmath>
// we could vary M & N to find the perf sweet spot
__host__ __device__
unsigned int hash(unsigned int a)
{
a = (a+0x7ed55d16) + (a<<12);
a = (a^0xc761c23c) ^ (a>>19);
a = (a+0x165667b1) + (a<<5);
a = (a+0xd3a2646c) ^ (a<<9);
a = (a+0xfd7046c5) + (a<<3);
a = (a^0xb55a4f09) ^ (a>>16);
return a;
}
struct estimate_pi : public thrust::unary_function<unsigned int,float>
{
__host__ __device__
float operator()(unsigned int thread_id)
{
float sum = 0;
unsigned int N = 10000; // samples per thread
unsigned int seed = hash(thread_id);
// seed a random number generator
thrust::default_random_engine rng(seed);
// create a mapping from random numbers to [0,1)
thrust::uniform_real_distribution<float> u01(0,1);
// take N samples in a quarter circle
for(unsigned int i = 0; i < N; ++i)
{
// draw a sample from the unit square
float x = u01(rng);
float y = u01(rng);
// measure distance from the origin
float dist = sqrtf(x*x + y*y);
// add 1.0f if (u0,u1) is inside the quarter circle
if(dist <= 1.0f)
sum += 1.0f;
}
// multiply by 4 to get the area of the whole circle
sum *= 4.0f;
// divide by N
return sum / N;
}
};
int main(void)
{
// use 30K independent seeds
int M;
std::cout << "M: ";
std::cin >> M;
try
{
float estimate = thrust::transform_reduce(thrust::counting_iterator<int>(0),
thrust::counting_iterator<int>(M),
estimate_pi(),
0.0f,
thrust::plus<float>());
estimate /= M;
std::cout << "M = " << std::setw(6) << M << " " << std::endl;
std::cout << std::setprecision(6);
std::cout << "pi is approximately " << estimate << std::endl;
}
catch (thrust::system_error &e)
{
// output an error message and exit
std::cerr << "Error: " << e.what() << std::endl;
exit(-1);
}
return 0;
}