选择并分析数组中点的窗口

时间:2010-08-24 10:57:50

标签: c

有人可以告诉我如何解决这个问题。

我有一个函数,它对数组中包含的一组点执行简单的回归分析。 我有一个数组(pval),其中包含我想要执行回归分析的所有数据。 这就是我想要实现的目的。

  1. 我得到了数组前7个元素的平均值。这就是我在程序中称之为“ref_avg”的内容。

  2. 我想对数组的每五个元素进行回归分析,将该数组的第一个元素作为'ref_avg'。这就是回归分析的每一步,我将在阵列中得到6分。

    e.g 对于第一步,下面计算的ref_avg是70.78。因此,简单回归的第一步将包含这些要点

    1st = {70.78,76.26,69.17,68.68,71.49,73.08},

    第二步将包含ref_avg作为第一个元素,其他元素从原始数组中的第二个元素开始

    2nd = {70.78,69.17,68.68,71.49,73.08,72.99},

    3rd = {70.78,68.68,71.49,73.08,72.99,70.36},

    4th = {70.78,71.49,73.08,72.99,70.36,57.82}依此类推,直至结束。

  3. 回归函数如下所示。

  4. 我不明白为什么'calcul'数组的前3个元素在回归的第一步有0.00值,第2步有2个元素,第3步有1个元素。 回归函数的最后一步也是打印3次。

      #include <stdio.h>
      #include <stdlib.h>           
      #include <string.h>   
    
       int main()
    {
    
      float pval[]={76.26,69.17,68.68,71.49,73.08,72.99,70.36,57.82,58.98,69.71,70.43,77.53,80.77,70.30,70.5,70.79,75.58,76.88,80.20,77.69,80.80,70.5,85.27,75.25};
    
    
       int count,Nhour;
       const int MAX_HOUR = 24;
       float *calcul=NULL;
       float *tab_time =NULL;
       float ref_avg;
       int size_hour=7;
       float sum=0;
       int length = Nhour+1;
       float m;
       float b;
       calcul=(float*)calloc(MAX_HOUR,sizeof(calcul));
         if (calcul==NULL) 
        {
            printf(" error in buffer\n");
            exit(EXIT_FAILURE);
        }
    
       tab_time= calloc(MAX_HOUR,sizeof(float));
    
             /* Get the average of the first seven elements */
                int i;
        for (i=0;i<size_hour;i++)
        {
        sum += pval[i];
        }
        ref_avg = sum / size_hour; 
    
              count=0;
            /* perform the regression analysis on 5 hours increment */
    
             while(count<=MAX_HOUR)
             {
              ++count;
               Nhour=5;
    
               int pass = -(Nhour-1);
               int i=0;
    
               for(i=0;i<Nhour+1;i++)  
                 {
                 if(count<MAX_HOUR)
                   {
    
                  calcul[0]=ref_avg;
                  calcul[i] =pval[count+pass];
                  pass++;
                   }
    
         printf("calc=%.2f\n",calcul[i]); // For debug only 
         tab_time[i]=i+1; 
    
                   if(i==Nhour)
                {
    
               linear_regression(tab_time, calcul, length, &m, &b);
               printf("Slope= %.2f\n", m);
    
                }
               }
         }
    
        free(calcul);
        calcul=NULL;
        free(tab_time);
        tab_time=NULL;              
        return 0;
      }
      /*  end of the main function */
    
    
       /* This function is used to calculate the linear 
        regression as it was called above in the main function. 
        It compiles and runs very well, was just included for the 
        compilation and execution of the main function above where I have a problem. */
    
    
        int linear_regression(const float *x,  const float *y, const int n, float *beta1, float *beta0)
        {
    
              float sumx = 0,
            sumy = 0,
            sumx2 = 0,
            sumxy = 0;
    
    int i;
    if (n <= 1) {
        *beta1 = 0;
        *beta0= 0;
        printf("Not enough data for regression \n");
            } 
              else 
                {
        float variance;
    
        for (i = 0; i < n; i++) 
                 {
            sumx += x[i];
            sumy += y[i];
    
            sumx2 += (x[i] * x[i]);
    
            sumxy += (x[i] * y[i]);
         }
        variance = (sumx2 - ((sumx * sumx) / n));
        if ( variance != 0) {
            *beta1 = (sumxy - ((sumx * sumy) / n)) /  variance;
            *beta0 = (sumy - ((*beta1) * sumx)) / n;
        } 
               else  
                     {
            *beta1 = 0;
            *beta0 = 0;
    
             }
    
            }
              return 0;
          }
    

1 个答案:

答案 0 :(得分:0)

我认为这段代码可以产生合理的答案。问题中引用的参考平均值似乎是错误的。不需要内存分配。 MAX_HOUR的值为24,但数组中只有23个数据值。构建要回归的数组的索引是伪造的,引用pval数组中的负索引(因此导致错误的结果)。变量Nhour在初始化之前被引用;变量长度未正确设置。没有很好的诊断印刷。

main()的身体在这里被重写了; linear_regression()上的编辑几乎是最小的。代码更加一致,并且使用了空白区域使其更易于阅读。当没有足够的数据来填充具有5个值的数组时,此版本终止回归 - 不清楚预期的终止条件是什么。

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>           
#include <string.h>   

void linear_regression(const float *x, const float *y, const int n,
                       float *beta1, float *beta0);

int main(void)
{
    float pval[]={
        76.26, 68.68, 71.49, 73.08, 72.99, 70.36, 57.82, 58.98,
        69.71, 70.43, 77.53, 80.77, 70.30, 70.50, 70.79, 75.58,
        76.88, 80.20, 77.69, 80.80, 70.50, 85.27, 75.25,
        };
    const int Nhour = 5;
    const int MAX_HOUR = sizeof(pval)/sizeof(pval[0]);
    const int size_hour = 7;
    float ref_avg;
    float sum = 0.0;
    float m;
    float b;
    float calc_y[6];
    float calc_x[6];

    /* Get the average of the first seven elements */
    for (int i = 0; i < size_hour; i++)
        sum += pval[i];
    ref_avg = sum / size_hour; 
    printf("ref avg = %5.2f\n", ref_avg); // JL

    /* perform the regression analysis on 5 hours increment */
    for (int pass = 0; pass <= MAX_HOUR - Nhour; pass++) // JL
    {
        calc_y[0] = ref_avg;
        calc_x[0] = pass + 1;
        printf("pass %d\ncalc_y[0] = %5.2f, calc_x[0] = %5.2f\n",
               pass, calc_y[0], calc_x[0]);
        for (int i = 1; i <= Nhour; i++)  
        {
            int n = pass + i - 1;
            calc_y[i] = pval[n];
            calc_x[i] = pass + i + 1; 
            printf("calc_y[%d] = %5.2f, calc_x[%d] = %5.2f, n = %2d\n",
                   i, calc_y[i], i, calc_x[i], n);
        }

        linear_regression(calc_x, calc_y, Nhour+1, &m, &b);
        printf("Slope= %5.2f, intercept = %5.2f\n", m, b);
    }

    return 0;
}

void linear_regression(const float *x, const float *y, const int n, float *beta1, float *beta0)
{
    float sumx1 = 0.0;
    float sumy1 = 0.0;
    float sumx2 = 0.0;
    float sumxy = 0.0;

    assert(n > 1);

    for (int i = 0; i < n; i++) 
    {
        sumx1 += x[i];
        sumy1 += y[i];
        sumx2 += (x[i] * x[i]);
        sumxy += (x[i] * y[i]);
    }
    float variance = (sumx2 - ((sumx1 * sumx1) / n));
    if (variance != 0.0)
    {
        *beta1 = (sumxy - ((sumx1 * sumy1) / n)) /  variance;
        *beta0 = (sumy1 - ((*beta1) * sumx1)) / n;
    } 
    else  
    {
        *beta1 = 0.0;
        *beta0 = 0.0;
    }
}