我正在编写算法的并行化版本来计算Gauss-Hermite求积的权重和横坐标(可以找到其详细信息here或here)。
我有以下算法使用GPU加速来计算值:
template <typename T>
ArrayPair<T> CalculateGaussHermiteWeights( const std::size_t sNumPoints, const T tEps = std::numeric_limits<T>::epsilon() )
{
const T tPiToQuarter = T( 0.7511255444649425 );
T tCurrentGuess, tFatherGuess, tGrandfatherGuess;
std::vector<T> vecInitialGuesses( sNumPoints );
for( std::size_t s = 0; s < (sNumPoints + 1U) / 2U; ++s )
{
if( s == 0 )
{
tCurrentGuess = sqrt( T( 2 * sNumPoints + 1 ) ) - T( 1.85575 ) * pow( T( 2 * sNumPoints + 1 ), T( -0.16667 ) );
}
else if( s == 1 )
{
tFatherGuess = tCurrentGuess;
tCurrentGuess -= T( 1.14 ) * pow( T( sNumPoints ), T( 0.426 ) ) / tCurrentGuess;
}
else if( s == 2 )
{
tGrandfatherGuess = tFatherGuess;
tFatherGuess = tCurrentGuess;
tCurrentGuess = T( 1.86 ) * tCurrentGuess - T( 0.86 ) * tGrandfatherGuess;
}
else if( s == 3 )
{
tGrandfatherGuess = tFatherGuess;
tFatherGuess = tCurrentGuess;
tCurrentGuess = T( 1.91 ) * tCurrentGuess - T( 0.91 ) * tGrandfatherGuess;
}
else
{
tGrandfatherGuess = tFatherGuess;
tFatherGuess = tCurrentGuess;
tCurrentGuess = T( 2.0 ) * tCurrentGuess - tGrandfatherGuess;
}
vecInitialGuesses[s] = tCurrentGuess;
}
concurrency::array<T, 1> arrWeights( sNumPoints ), arrAbscissae( sNumPoints, std::begin( vecInitialGuesses ) );
concurrency::parallel_for_each( arrAbscissae.extent, [=, &arrWeights, &arrAbscissae]( concurrency::index<1> index ) restrict( amp ) {
T tVal = arrAbscissae[index], tIntermediate;
std::size_t sNumIterations = 0U;
T tPolynomial1 = tPiToQuarter, tPolynomial2 = T( 0.0 ), tPolynomial3, tDerivative;
do {
for( std::size_t s = 0; s < sNumIterations; ++s )
{
tPolynomial3 = tPolynomial2;
tPolynomial2 = tPolynomial1;
tPolynomial1 = tVal * concurrency::precise_math::sqrt( T( 2.0 ) / (s + 1U) ) * tPolynomial2 - concurrency::precise_math::sqrt( T( s ) / (s + 1U) ) * tPolynomial3;
}
tDerivative = concurrency::precise_math::sqrt( T( 2 * sNumPoints ) ) * tPolynomial2;
tIntermediate = tVal;
tVal = tIntermediate - tPolynomial1 / tDerivative;
} while( concurrency::precise_math::fabs( tVal - tIntermediate ) < tEps && sNumIterations < 10 );
arrAbscissae[index] = tVal;
arrAbscissae[concurrency::index<1>( sNumIterations - 1U - index[0] )] = -tVal;
arrWeights[index] = T( 2.0 ) / (tDerivative * tDerivative);
arrWeights[concurrency::index<1>( sNumIterations - 1U - index[0] )] = arrWeights[index];
} );
return std::make_pair( std::move( arrAbscissae ), std::move( arrWeights ) );
}
但是,如果我调用算法并打印出我得到的横坐标和相应的权重(例如使用n=5
),我会得到以下结果:
Abscissa Weight
2.02499 0.0133645
0.958991 0.363567
0.000186652 0.958224
-0.957991 0.363567
-2.02499 0.0133645
但是,正确的表格是:
2.02018 0.0199532
0.958572 0.393619
0 0.945309
-0.958572 0.393619
-2.02018 0.0199532
正如您所看到的,两个值表之间存在(很小但绝对不可忽略的)差异。起初我认为这是由sqrt
的不准确造成的,但事实并非如此。然后我猜测它是由于牛顿方法中的迭代次数太少,但是,增加迭代次数没有区别,所以我猜测算法存在缺陷,但是,我找不到它是什么。
提前致谢!