我在openmp上遇到了一些问题。我已经编写了一些计算代码并使用openmp对代码进行了并行化。顺序和平行给了我不同的结果。
这是代码
for(i=0; i<grid_number; i++)
{
double norm = 0;
const double alpha = gsl_vector_get(valpha, i);
for(j=0; j<n_sim; j++)
{
gsl_matrix_complex *sub_data = gsl_matrix_complex_calloc(n_obs, 1);
struct cmatrix cm;
cm.H0 = gsl_matrix_complex_calloc(n_obs-1, nWeight);
cm.optMatrix = gsl_matrix_complex_calloc(n_obs-1, n_obs-1);
for(k=0; k<3; k++)
{
gsl_vector_set(sub_b02, k, gsl_matrix_get(b02, j, k));
}
for(k=0; k<n_obs; k++)
{
const gsl_complex z = gsl_complex_rect(gsl_matrix_get(data2, k, j), 0);
gsl_matrix_complex_set(sub_data, k, 0, z);
}
gsl_vector* theta = gsl_vector_calloc(3);
c_matrix(sub_b02, sub_data, 1, cm, alpha);
fminsearch(sub_b02, sub_data, cm.optMatrix, cm.H0, theta);
gsl_vector_sub(theta, theta1);
norm += gsl_blas_dnrm2(theta);
gsl_matrix_free(sub_data);
gsl_matrix_free(cm.H0);
gsl_matrix_free(cm.optMatrix);
gsl_vector_free(theta);
}
double mse = total_weight * norm /(double)n_sim;
printf("alpha:%f, MSE:%.12e\n", alpha, mse);
mses[i] = mse;
alphas[i] = alpha;
}
运行此代码,给出以下结果:
alpha:0.000010, MSE:1.368646778831e-01
alpha:0.000076, MSE:1.368646778831e-01
alpha:0.000142, MSE:1.368646778831e-01
alpha:0.000208, MSE:1.368646778831e-01
alpha:0.000274, MSE:1.368646778831e-01
alpha:0.000340, MSE:1.368646778831e-01
alpha:0.000406, MSE:1.368646778831e-01
alpha:0.000472, MSE:1.368646778831e-01
alpha:0.000538, MSE:1.368646778831e-01
alpha:0.000604, MSE:1.368646778831e-01
alpha:0.000670, MSE:1.368646778831e-01
alpha:0.000736, MSE:1.368646778831e-01
alpha:0.000802, MSE:1.368646778831e-01
alpha:0.000868, MSE:1.368646778831e-01
alpha:0.000934, MSE:1.368646778831e-01
然后我尝试使用open mp:
来平行代码#pragma omp parallel for private(j,k)
for(i=0; i<grid_number; i++)
{
double norm = 0;
const double alpha = gsl_vector_get(valpha, i);
for(j=0; j<n_sim; j++)
{
gsl_matrix_complex *sub_data = gsl_matrix_complex_calloc(n_obs, 1);
struct cmatrix cm;
cm.H0 = gsl_matrix_complex_calloc(n_obs-1, nWeight);
cm.optMatrix = gsl_matrix_complex_calloc(n_obs-1, n_obs-1);
for(k=0; k<3; k++)
{
gsl_vector_set(sub_b02, k, gsl_matrix_get(b02, j, k));
}
for(k=0; k<n_obs; k++)
{
const gsl_complex z = gsl_complex_rect(gsl_matrix_get(data2, k, j), 0);
gsl_matrix_complex_set(sub_data, k, 0, z);
}
gsl_vector* theta = gsl_vector_calloc(3);
c_matrix(sub_b02, sub_data, 1, cm, alpha);
fminsearch(sub_b02, sub_data, cm.optMatrix, cm.H0, theta);
gsl_vector_sub(theta, theta1);
norm += gsl_blas_dnrm2(theta);
gsl_matrix_free(sub_data);
gsl_matrix_free(cm.H0);
gsl_matrix_free(cm.optMatrix);
gsl_vector_free(theta);
}
double mse = total_weight * norm /(double)n_sim;
printf("alpha:%f, MSE:%.12e\n", alpha, mse);
mses[i] = mse;
alphas[i] = alpha;
}
并行结果:
alpha:0.000934, MSE:1.368646778831e-01
alpha:0.000802, MSE:1.368646778831e-01
alpha:0.000274, MSE:1.368646778831e-01
alpha:0.000670, MSE:1.368646778831e-01
alpha:0.000010, MSE:1.368646778831e-01
alpha:0.000538, MSE:1.368646778831e-01
alpha:0.000406, MSE:1.368646778831e-01
alpha:0.000142, MSE:1.368646778831e-01
alpha:0.000736, MSE:1.368646778831e-01
alpha:0.000604, MSE:1.368646778831e-01
alpha:0.000208, MSE:1.368388509959e-01
alpha:0.000340, MSE:1.368646778831e-01
alpha:0.000472, MSE:1.369194416804e-01
alpha:0.000868, MSE:1.368691005950e-01
alpha:0.000076, MSE:1.369461873652e-01
为什么两个alpha的结果都不同?
答案 0 :(得分:0)
程序的顺序版本和并行版本之间的不同结果实际上总是意味着一件事:竞争条件。在你的情况下,很难确定原因,因为你没有提供一个最小的工作示例(羞辱你)。
但是我设法对一些遗漏的东西进行逆向工程,我会声称你的问题是变量sub_b02
。它在并行块之外定义,默认情况下使其共享,但是在其上调用gsl_vector_set
,这会使不同的线程写入相同的内存位置。由于它是一个指针,您可能需要在parallel
块中分配它。
我不能说没有更多的错误,特别是因为我看不到c_matrix
和fminsearch
。但是你应该做的是花一点时间来思考哪些变量应该与线程共享/私有,然后将default(none)
添加到pragma并明确地写出shared/private
内容。这可以让你更好地了解你所缺少的东西。