以下代码在C中使用dgesv库来计算线性回归。它有X个观测值和Y个预测值,X和Y保存为双数组。我想知道
1)此代码是否通过截距计算线性回归? 2)在使用或不使用拦截方面,如何更改代码以使其与1相反?
以下是代码:
#include <stdio.h>
#define N 16 /* number of observations */
#define P 2 /* number of predictors */
void dgesv_(int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);
int main(){
/* longley dataset from R: Employed (Y) GNP.deflator and Population (X) */
double Y[N] = {60.323,61.122,60.171,61.187,63.221,63.639,64.989,63.761,66.019,67.857,68.169,66.513,68.655,69.564,69.331,70.551};
double X[N][P] =
{{83,107.608},
{88.5,108.632},
{88.2,109.773},
{89.5,110.929},
{96.2,112.075},
{98.1,113.27},
{99,115.094},
{100,116.219},
{101.2,117.388},
{104.6,118.734},
{108.4,120.445},
{110.8,121.95},
{112.6,123.366},
{114.2,125.368},
{115.7,127.852},
{116.9,130.081}};
int i, j, k, n1=P+1, n2=1, ipiv[P+1], info;
double X1[N][P+1], XtX[(P+1) * (P+1)], XtY[P+1];
其余代码如下:
/* design matrix */
for (i=0; i<N; i++){
X1[i][0] = 1;
for (j=1; j<n1; j++)
X1[i][j] = X[i][j-1];
}
/* t(X1) %*% X1 */
for (i=0; i<n1; i++){
for (j=0; j<n1; j++){
XtX[i*n1+j] = 0;
for (k=0; k<N; k++)
XtX[i*n1+j] += X1[k][i] * X1[k][j];
}
}
/* t(X1) %*% Y */
for (i=0; i<n1; i++){
XtY[i] = 0;
for (j=0; j<N; j++){
XtY[i] += X1[j][i] * Y[j];
}
}
其余代码如下......
/* XtX is symmetric, no transpose needed before passing to Fortran subrountine */
dgesv_(&n1, &n2, XtX, &n1, ipiv, XtY, &n1, &info);
if (info!=0) printf("failure with error %d\n", info);
/* print beta */
printf("The regression coefficients: ");
for (i=0; i<n1; i++){
printf("%f ", XtY[i]);
}
printf("\n");
return 0;
}