Question

我正在编写算法的并行化版本来计算Gauss-Hermite求积的权重和横坐标（可以找到其详细信息here或here）。

我有以下算法使用GPU加速来计算值：

template <typename T>
ArrayPair<T> CalculateGaussHermiteWeights( const std::size_t sNumPoints, const T tEps = std::numeric_limits<T>::epsilon() )
{
    const T tPiToQuarter = T( 0.7511255444649425 );

    T tCurrentGuess, tFatherGuess, tGrandfatherGuess;
    std::vector<T> vecInitialGuesses( sNumPoints );
    for( std::size_t s = 0; s < (sNumPoints + 1U) / 2U; ++s )
    {
        if( s == 0 )
        {
            tCurrentGuess = sqrt( T( 2 * sNumPoints + 1 ) ) - T( 1.85575 ) * pow( T( 2 * sNumPoints + 1 ), T( -0.16667 ) );
        }
        else if( s == 1 )
        {
            tFatherGuess = tCurrentGuess;
            tCurrentGuess -= T( 1.14 ) * pow( T( sNumPoints ), T( 0.426 ) ) / tCurrentGuess;
        }
        else if( s == 2 )
        {
            tGrandfatherGuess = tFatherGuess;
            tFatherGuess = tCurrentGuess;
            tCurrentGuess = T( 1.86 ) * tCurrentGuess - T( 0.86 ) * tGrandfatherGuess;
        }
        else if( s == 3 )
        {
            tGrandfatherGuess = tFatherGuess;
            tFatherGuess = tCurrentGuess;
            tCurrentGuess = T( 1.91 ) * tCurrentGuess - T( 0.91 ) * tGrandfatherGuess;
        }
        else
        {
            tGrandfatherGuess = tFatherGuess;
            tFatherGuess = tCurrentGuess;
            tCurrentGuess = T( 2.0 ) * tCurrentGuess - tGrandfatherGuess;
        }

        vecInitialGuesses[s] = tCurrentGuess;
    }

    concurrency::array<T, 1> arrWeights( sNumPoints ), arrAbscissae( sNumPoints, std::begin( vecInitialGuesses ) );
    concurrency::parallel_for_each( arrAbscissae.extent, [=, &arrWeights, &arrAbscissae]( concurrency::index<1> index ) restrict( amp ) {
        T tVal = arrAbscissae[index], tIntermediate;

        std::size_t sNumIterations = 0U;
        T tPolynomial1 = tPiToQuarter, tPolynomial2 = T( 0.0 ), tPolynomial3, tDerivative;
        do {
            for( std::size_t s = 0; s < sNumIterations; ++s )
            {
                tPolynomial3 = tPolynomial2;
                tPolynomial2 = tPolynomial1;
                tPolynomial1 = tVal * concurrency::precise_math::sqrt( T( 2.0 ) / (s + 1U) ) * tPolynomial2 - concurrency::precise_math::sqrt( T( s ) / (s + 1U) ) * tPolynomial3;
            }

            tDerivative = concurrency::precise_math::sqrt( T( 2 * sNumPoints ) ) * tPolynomial2;
            tIntermediate = tVal;
            tVal = tIntermediate - tPolynomial1 / tDerivative;

        } while( concurrency::precise_math::fabs( tVal - tIntermediate ) < tEps && sNumIterations < 10 );

        arrAbscissae[index] = tVal;
        arrAbscissae[concurrency::index<1>( sNumIterations - 1U - index[0] )] = -tVal;

        arrWeights[index] = T( 2.0 ) / (tDerivative * tDerivative);
        arrWeights[concurrency::index<1>( sNumIterations - 1U - index[0] )] = arrWeights[index];
    } );

    return std::make_pair( std::move( arrAbscissae ), std::move( arrWeights ) );
}

但是，如果我调用算法并打印出我得到的横坐标和相应的权重（例如使用n=5），我会得到以下结果：

Abscissa                     Weight
2.02499                      0.0133645
0.958991                     0.363567
0.000186652                  0.958224
-0.957991                    0.363567
-2.02499                     0.0133645

但是，正确的表格是：

2.02018                      0.0199532
0.958572                     0.393619
0                            0.945309
-0.958572                    0.393619
-2.02018                     0.0199532

正如您所看到的，两个值表之间存在（很小但绝对不可忽略的）差异。起初我认为这是由sqrt的不准确造成的，但事实并非如此。然后我猜测它是由于牛顿方法中的迭代次数太少，但是，增加迭代次数没有区别，所以我猜测算法存在缺陷，但是，我找不到它是什么。

提前致谢！

提高Gauss-Hermite权重计算算法的准确性

0 个答案: