如何正确计算互相关?

时间:2017-02-19 01:44:11

标签: c statistics signal-processing cross-correlation

我写了一个程序(希望)计算两个信号之间的互相关。虽然我已经很好地研究了如何进行计算,但我无法弄清楚一些重要的细节。我特别关注平均计算。似乎一些算法利用整个数据集的平均值来为每个移位(或延迟)执行相关计算。换句话说,他们使用恒定的平均值。我甚至发现了一些只计算一次分母的算法,将其作为其余延迟的常数值。但是,我认为只考虑叠加范围内的数据,迭代计算平均值和分母。因此,我为这个程序写了两个版本。它们似乎在较小的延迟时产生非常相似的结果。我想知道哪一个是正确的。

迭代平均值:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

FILE *input1, *input2, *output;
int m = 0, n = 0, t;
float *VarA, *VarB, *Results, *Results2;

void open_inputs_and_output();

void count_and_check_lines();

void allocate_memory();

void read_data();

void allocate_memory2();

void write_output();

int main()
{
    float SumAverageA = 0, SumAverageB = 0, AverageA, AverageB, SubA, SubB, SumAB = 0, SumAs = 0, SumBs = 0, Correl;
    int p = 0, i, delay;

    open_inputs_and_output();

    count_and_check_lines();

    rewind(input1);
    rewind(input2);

    allocate_memory();

    read_data();

    fclose(input1);
    fclose(input2);

    printf("How many lag steps from the origin do you want to calculate?\nIf you want as many steps as the number of input points, type -1.\n");
    scanf("%i", &p);

    if(p < -1)
    {
        printf("Bad number!\n");
        exit(1);
    }
    else if(p == -1)
        t = n;
    else
        t = p;

    allocate_memory2();

    printf("\nWait...\n\n");

    for(delay = 0; delay < t; delay++)
    {
        for(i = delay; i < n; i ++)
        {
            SumAverageA += VarA[i];
            SumAverageB += VarB[(i - delay)];
        }

        AverageA = SumAverageA / (n - delay);
        AverageB = SumAverageB / (n - delay);

        for(i = delay; i < n; i++)
        {
            SubA = VarA[i] - AverageA;
            SubB = VarB[(i - delay)] - AverageB;
            SumAB += (SubA * SubB);
            SumAs += (SubA * SubA);
            SumBs += (SubB * SubB);
        }

        Correl = SumAB / (sqrt(SumAs * SumBs));

        Results[delay] = Correl;

        SumAverageA = 0;
        SumAverageB = 0;
        SumAB = 0;
        SumAs = 0;
        SumBs = 0;

        for(i = delay; i < n; i++)
        {
            SubB = VarB[i] - AverageB;
            SubA = VarA[(i - delay)] - AverageA;
            SumAB += (SubA * SubB);
            SumAs += (SubA * SubA);
            SumBs += (SubB * SubB);
        }

        Correl = SumAB / (sqrt(SumAs * SumBs));

        Results2[delay] = Correl;

        SumAverageA = 0;
        SumAverageB = 0;
        SumAB = 0;
        SumAs = 0;
        SumBs = 0;
    }

    printf("Calculations performed.\n");

    free(VarA);
    free(VarB);

    write_output();

    free(Results);
    free(Results2);

    fclose(output);

    return 0;
}

void open_inputs_and_output()
{
    input1 = fopen("C:\\...\\test.txt","r");

    if (input1 == NULL)
    {
        printf("Error! Could not open input 1.\n");
        exit(1);
    }
    else
        printf("Input1 opening: OK.\n");

    input2 = fopen("C:\\...\\test2.txt","r");

    if (input2 == NULL)
    {
        printf("Error! Could not open input 2.\n");
        exit(1);
    }
    else
        printf("Input2 opening: OK.\n");

    output = fopen("C:\\...\\out.txt","w");

    if (output == NULL)
    {
        printf("Error! Could not open output.\n");
        exit(1);
    }
    else
        printf("Output opening: OK.\n");
}

void count_and_check_lines()
{
    float k;

    while(fscanf(input1,"%f",&k) == 1)
        n++;

    printf("n = %i\n", n);

    while(fscanf(input2,"%f",&k) == 1)
        m++;

    printf("m = %i\n", m);

    if(m != n)
    {
        printf("Error: Number of rows does not match!\n");
        exit(1);
    }
    else
        printf("Number of rows matches.\n");
}

void allocate_memory()
{
    VarA = calloc(n, sizeof(float));

    if(VarA == NULL)
    {
        printf("Could not allocate memory for VarA.\n");
        exit(1);
    }

    VarB = calloc(m, sizeof(float));

    if(VarA == NULL)
    {
        printf("Could not allocate memory for VarB.\n");
        exit(1);
    }
}

void read_data()
{
    int i;

    for(i = 0; i < n; i++)
        fscanf(input1,"%f",&VarA[i]);

    printf("Data A successfully read.\n");

    for(i = 0; i < m; i++)
        fscanf(input2,"%f",&VarB[i]);

    printf("Data B successfully read.\n");
}

void allocate_memory2()
{
    Results = calloc(t, sizeof(float));

    if(Results == NULL)
    {
        printf("Could not allocate memory for Results.\n");
        exit(1);
    }

    Results2 = calloc(t, sizeof(float));

    if(Results2 == NULL)
    {
        printf("Could not allocate memory for Results2.\n");
        exit(1);
    }
}

void write_output()
{
    int i;

    for(i = t - 1; i > 0; i--)
        fprintf(output,"-%i %f\n", i , Results2[i]);

    for(i = 0; i < t; i++)
        fprintf(output,"%i %f\n", i , Results[i]);

    printf("Results written.\n");
}

恒定平均值:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

FILE *input1, *input2, *output;
int m = 0, n = 0, t;
float *VarA, *VarB, *Results, *Results2;

void open_inputs_and_output();

void count_and_check_lines();

void allocate_memory();

void read_data();

void allocate_memory2();

void write_output();

int main()
{
    float SumAverageA = 0, SumAverageB = 0, AverageA, AverageB, SubA, SubB, SumAB = 0, SumAs = 0, SumBs = 0, Correl;
    int p = 0, i, delay;

    open_inputs_and_output();

    count_and_check_lines();

    rewind(input1);
    rewind(input2);

    allocate_memory();

    read_data();

    fclose(input1);
    fclose(input2);

    printf("How many lag steps from the origin do you want to calculate?\nIf you want as many steps as the number of input points, type -1.\n");
    scanf("%i", &p);

    if(p < -1)
    {
        printf("Bad number!\n");
        exit(1);
    }
    else if(p == -1)
        t = n;
    else
        t = p;

    allocate_memory2();

    printf("\nWait...\n\n");

    for(i = 0; i < n; i ++)
    {
        SumAverageA += VarA[i];
        SumAverageB += VarB[i];
    }

    AverageA = SumAverageA / n;
    AverageB = SumAverageB / n;

    for(delay = 0; delay < t; delay++)
    {
        for(i = delay; i < n; i++)
        {
            SubA = VarA[i] - AverageA;
            SubB = VarB[(i - delay)] - AverageB;
            SumAB += (SubA * SubB);
            SumAs += (SubA * SubA);
            SumBs += (SubB * SubB);
        }

        Correl = SumAB / (sqrt(SumAs * SumBs));

        Results[delay] = Correl;

        SumAverageA = 0;
        SumAverageB = 0;
        SumAB = 0;
        SumAs = 0;
        SumBs = 0;

        for(i = delay; i < n; i++)
        {
            SubB = VarB[i] - AverageB;
            SubA = VarA[(i - delay)] - AverageA;
            SumAB += (SubA * SubB);
            SumAs += (SubA * SubA);
            SumBs += (SubB * SubB);
        }

        Correl = SumAB / (sqrt(SumAs * SumBs));

        Results2[delay] = Correl;

        SumAverageA = 0;
        SumAverageB = 0;
        SumAB = 0;
        SumAs = 0;
        SumBs = 0;
    }

    printf("Calculations performed.\n");

    free(VarA);
    free(VarB);

    write_output();

    free(Results);
    free(Results2);

    fclose(output);

    return 0;
}

void open_inputs_and_output()
{
    input1 = fopen("C:\\...\\test.txt","r");

    if (input1 == NULL)
    {
        printf("Error! Could not open input 1.\n");
        exit(1);
    }
    else
        printf("Input1 opening: OK.\n");

    input2 = fopen("C:\\...\\test2.txt","r");

    if (input2 == NULL)
    {
        printf("Error! Could not open input 2.\n");
        exit(1);
    }
    else
        printf("Input2 opening: OK.\n");

    output = fopen("C:\\...\\out.txt","w");

    if (output == NULL)
    {
        printf("Error! Could not open output.\n");
        exit(1);
    }
    else
        printf("Output opening: OK.\n");
}

void count_and_check_lines()
{
    float k;

    while(fscanf(input1,"%f",&k) == 1)
        n++;

    printf("n = %i\n", n);

    while(fscanf(input2,"%f",&k) == 1)
        m++;

    printf("m = %i\n", m);

    if(m != n)
    {
        printf("Error: Number of rows does not match!\n");
        exit(1);
    }
    else
        printf("Number of rows matches.\n");
}

void allocate_memory()
{
    VarA = calloc(n, sizeof(float));

    if(VarA == NULL)
    {
        printf("Could not allocate memory for VarA.\n");
        exit(1);
    }

    VarB = calloc(m, sizeof(float));

    if(VarA == NULL)
    {
        printf("Could not allocate memory for VarB.\n");
        exit(1);
    }
}

void read_data()
{
    int i;

    for(i = 0; i < n; i++)
        fscanf(input1,"%f",&VarA[i]);

    printf("Data A successfully read.\n");

    for(i = 0; i < m; i++)
        fscanf(input2,"%f",&VarB[i]);

    printf("Data B successfully read.\n");
}

void allocate_memory2()
{
    Results = calloc(t, sizeof(float));

    if(Results == NULL)
    {
        printf("Could not allocate memory for Results.\n");
        exit(1);
    }

    Results2 = calloc(t, sizeof(float));

    if(Results2 == NULL)
    {
        printf("Could not allocate memory for Results2.\n");
        exit(1);
    }
}

void write_output()
{
    int i;

    for(i = t - 1; i > 0; i--)
        fprintf(output,"-%i %f\n", i , Results2[i]);

    for(i = 0; i < t; i++)
        fprintf(output,"%i %f\n", i , Results[i]);

    printf("Results written.\n");
}

参考文献:

http://www.jot.fm/issues/issue_2010_03/column2.pdf

http://paulbourke.net/miscellaneous/correlate/

1 个答案:

答案 0 :(得分:0)

如果您的流程为wide sense stationary,则平均值不会随时间而变化。如果这些过程也是ergodic,则可以通过计算单个时间序列的平均值来获得这些平均值。在这种情况下,您可以使用所有可用的样本来尽可能准确地估计平均值。这自然会导致您的“恒定平均”实施。

另一方面,如果你的过程不是广义的静止和遍历,那么获得对其本地方法的良好估计可能会被证明是一个更大的挑战。估计过程大致静止的较小时间窗口的平均值可能是一种合理的方法(类似于“迭代平均值”实现)。请注意,还有其他方法,它们的适用性取决于具体应用和特定信号的属性。

然后,您可能想知道如何知道您的流程是否是广义固定的。不幸的是,除非你对如何生成这些过程有很多了解,否则你能做的最好的事情就是假设这个过程是广义无定的,然后试图反驳这个假设(通过观察超出预期范围的结果) ;见statistical hypothesis testing)。