C ++中的Anderson Darling测试

时间:2017-02-28 16:53:52

标签: c++ statistics

我正在尝试计算发现here的Anderson-Darling测试。我按照维基百科上的步骤确保当我使用MATLAB计算我测试的数据的平均值和标准差时表示X。另外,我使用了一个名为phi的函数来计算标准普通CDF,我也测试了这个函数,以确保它是正确的。现在,当我实际计算A平方时,我似乎遇到了问题(在维基百科中表示,我在C ++中将其表示为A)。

这是我为Anderson-Darling测试所做的功能:

void Anderson_Darling(int n, double X[]){
    sort(X,X + n);
    // Find the mean of X
    double X_avg = 0.0;
    double sum = 0.0;
    for(int i = 0; i < n; i++){
        sum += X[i];
    }
    X_avg = ((double)sum)/n;


    // Find the variance of X
    double X_sig = 0.0;
    for(int i = 0; i < n; i++){
        X_sig += (X[i] - X_avg)*(X[i] - X_avg);
    }
    X_sig /= n;


    // The values X_i are standardized to create new values Y_i
    double Y[n];
    for(int i = 0; i < n; i++){
        Y[i] = (X[i] - X_avg)/(sqrt(X_sig));
        //cout << Y[i] << endl;
    }

    // With a standard normal CDF, we calculate the Anderson_Darling Statistic
    double A = 0.0;
    for(int i = 0; i < n; i++){
        A += -n - 1/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
    }
    cout << A << endl;
} 

注意,我知道Anderson-Darling(A-squared)的公式以i = 1i = n开头,虽然当我更改索引以使其在C ++中工作时,我仍然得到相同的结果而不改变索引。

我在C ++中获得的值是:

-4e+006

在MATLAB中收到的值应该是:

0.2330

非常感谢任何建议。

以下是我的全部代码:

#include <iostream>
#include <math.h>
#include <cmath>
#include <random>
#include <algorithm>
#include <chrono>

using namespace std;

double *Box_Muller(int n, double u[]);
double *Beasley_Springer_Moro(int n, double u[]);
void Anderson_Darling(int n, double X[]);
double phi(double x);

int main(){
    int n = 2000;
    double Mersenne[n];
    random_device rd;
    mt19937 e2(1);
    uniform_real_distribution<double> dist(0, 1);
    for(int i = 0; i < n; i++){
        Mersenne[i] = dist(e2);
    }

    // Print Anderson Statistic for Mersenne 6a
    double *result = new double[n];
    result = Box_Muller(n,Mersenne);
    Anderson_Darling(n,result);




    return 0;
}

double *Box_Muller(int n, double u[]){
    double *X = new double[n];
    double Y[n];
    double R_2[n];
    double theta[n];
    for(int i = 0; i < n; i++){
        R_2[i] = -2.0*log(u[i]);
        theta[i] = 2.0*M_PI*u[i+1];
    }
    for(int i = 0; i < n; i++){
        X[i] = sqrt(-2.0*log(u[i]))*cos(2.0*M_PI*u[i+1]);
        Y[i] = sqrt(-2.0*log(u[i]))*sin(2.0*M_PI*u[i+1]);
    }
    return X;
}

double *Beasley_Springer_Moro(int n, double u[]){
    double y[n];
    double r[n+1];
    double *x = new double(n);
    // Constants needed for algo
    double a_0 = 2.50662823884;     double b_0 = -8.47351093090;
    double a_1 = -18.61500062529;   double b_1 = 23.08336743743;
    double a_2 = 41.39119773534;    double b_2 = -21.06224101826;
    double a_3 = -25.44106049637;   double b_3 = 3.13082909833;

    double c_0 = 0.3374754822726147; double c_5 = 0.0003951896511919;
    double c_1 = 0.9761690190917186; double c_6 = 0.0000321767881768;
    double c_2 = 0.1607979714918209; double c_7 = 0.0000002888167364;
    double c_3 = 0.0276438810333863; double c_8 = 0.0000003960315187;
    double c_4 = 0.0038405729373609;

    // Set r and x to empty for now
    for(int i = 0; i <= n; i++){
        r[i] = 0.0;
        x[i] = 0.0;
    }
    for(int i = 1; i <= n; i++){
        y[i] = u[i] - 0.5;
        if(fabs(y[i]) < 0.42){
            r[i] = pow(y[i],2.0);
            x[i] = y[i]*(((a_3*r[i] + a_2)*r[i] + a_1)*r[i] + a_0)/((((b_3*r[i] + b_2)*r[i] + b_1)*r[i] + b_0)*r[i] + 1);
        }else{
            r[i] = u[i];
            if(y[i] > 0.0){
                r[i] = 1.0 - u[i];
                r[i] = log(-log(r[i]));
                x[i] = c_0 + r[i]*(c_1 + r[i]*(c_2 + r[i]*(c_3 + r[i]*(c_4 + r[i]*(c_5 + r[i]*(c_6 + r[i]*(c_7 + r[i]*c_8)))))));
            }
            if(y[i] < 0){
                x[i] = -x[i];
            }
        }
    }
    return x;
}

    double phi(double x){
    return 0.5 * erfc(-x * M_SQRT1_2);
}


void Anderson_Darling(int n, double X[]){
    sort(X,X + n);
    // Find the mean of X
    double X_avg = 0.0;
    double sum = 0.0;
    for(int i = 0; i < n; i++){
        sum += X[i];
    }
    X_avg = ((double)sum)/n;


    // Find the variance of X
    double X_sig = 0.0;
    for(int i = 0; i < n; i++){
        X_sig += (X[i] - X_avg)*(X[i] - X_avg);
    }
    X_sig /= (n-1);


    // The values X_i are standardized to create new values Y_i
    double Y[n];
    for(int i = 0; i < n; i++){
        Y[i] = (X[i] - X_avg)/(sqrt(X_sig));
        //cout << Y[i] << endl;
    }

    // With a standard normal CDF, we calculate the Anderson_Darling Statistic
    double A = -n;
    for(int i = 0; i < n; i++){
        A +=  -1.0/(double)n *(2*(i+1) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n - i])));
    }
    cout << A << endl;
}

2 个答案:

答案 0 :(得分:4)

让我猜,你的n是2000.对吗? 这里的主要问题是你在最后一个表达式中做1 / n。 1是int,ao是n。当你将1除以n时,它执行整数除法。现在1除以任意数字&gt; 1在整数除法下是0(想想它是否只保留商的整数部分。你需要做的是通过写1 /(双)n将n转换为double。

休息一切都应该可以正常工作。

讨论摘要 -

  1. Y []的索引应分别为i和n-1-i。
  2. n不应该在循环中添加但只能添加一次。
  3. 小修正,例如在计算方差时将除数更改为n而不是n-1。

答案 1 :(得分:2)

这里有整数除法:

A += -n - 1/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
          ^^^
1/n

n > 1为零 - 您需要将其更改为,例如:1.0/n

A += -n - 1.0/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
          ^^^^^