我正在编写一个多线程c程序来乘以两个矩阵,并使用pthreads和Blas找到行规范。当我将矩阵的维度设置为4并且使用的线程数为2时,我认为我有它工作。然后我改变了线程的数量,它不再起作用。它不会计算错误的答案,但在我尝试加入线程时会卡住
void *matrix_norm(void *arg){
mat_norm_t *thread_mat_norm_data = arg;
int n = thread_mat_norm_data->n;
int i, j;
double norm = 0.;
for(i=0;i<thread_mat_norm_data->sub_n;i++){
double row_sum = 0.;
for(j=0;j<n;j++){
row_sum += *(thread_mat_norm_data->z+i*n+j);
}
if(row_sum>norm){
norm = row_sum;
}
}
pthread_mutex_lock(thread_mat_norm_data->mutex);
if (norm > *(thread_mat_norm_data->global_norm)){
*(thread_mat_norm_data->global_norm)=norm;
}
pthread_mutex_unlock(thread_mat_norm_data->mutex);
pthread_exit(NULL);
}
int main() {
pthread_t *working_thread;
mat_mult_t *thread_mat_mult_data;
mat_norm_t *thread_mat_norm_data;
pthread_mutex_t *mutex;
double *x, *y, *z, norm;
int i, rows_per_thread;
int n = 8;
int num_of_thrds = 4;// Works when this is 2, not when 4
if(n<=num_of_thrds && num_of_thrds < MAXTHRDS){
printf("Matrix dim must be greater than num of thrds\nand num of thrds less than 124.\n");
return (-1);
}
x = malloc(n*n*sizeof(double));
y = malloc(n*n*sizeof(double));
z = malloc(n*n*sizeof(double));
initMat(n, x);
initMat(n, y);
working_thread = malloc(num_of_thrds * sizeof(pthread_t));
thread_mat_mult_data = malloc(num_of_thrds * sizeof(mat_mult_t));
rows_per_thread = n/num_of_thrds;
for(i=0;i<num_of_thrds;i++){
thread_mat_mult_data[i].x = x + i * rows_per_thread * n;
thread_mat_mult_data[i].y = y;
thread_mat_mult_data[i].z = z + i * rows_per_thread * n;
thread_mat_mult_data[i].n = n;
thread_mat_mult_data[i].sub_n =
(i == num_of_thrds-1) ? n-(num_of_thrds-1)*rows_per_thread : rows_per_thread;
pthread_create(&working_thread[i], NULL, matrix_mult, (void *)&thread_mat_mult_data[i]);
}
for(i=0;i<num_of_thrds;i++){
pthread_join(working_thread[i], NULL);
}
free(working_thread);
working_thread = malloc(num_of_thrds * sizeof(pthread_t));
thread_mat_norm_data = malloc(num_of_thrds * sizeof(mat_norm_t));
mutex = malloc(sizeof(pthread_mutex_t));
for(i=0;i<num_of_thrds;i++){
thread_mat_norm_data[i].z = z + i * rows_per_thread * n;
thread_mat_norm_data[i].n = n;
thread_mat_norm_data[i].global_norm = &norm;
thread_mat_norm_data[i].sub_n =
(i == num_of_thrds-1) ? n-(num_of_thrds-1)*rows_per_thread : rows_per_thread;
thread_mat_norm_data[i].mutex = mutex;
pthread_create(&working_thread[i], NULL, matrix_norm, (void *)&thread_mat_norm_data[i]);
}
//Stuck running here
for(i=0;i<num_of_thrds;i++){
pthread_join(working_thread[i], NULL);
}
printMat(n, z , "z");
printf("\nRow Sum Norm = %f\n", norm);
free(x);
free(y);
free(z);
free(working_thread);
free(thread_mat_mult_data);
free(thread_mat_norm_data);
pthread_mutex_destroy(mutex);
free(mutex);
return(0);
}
我不确定为什么它在某些情况下有效,而不是其他情况,任何解释都会很棒!
答案 0 :(得分:1)
忘记使用pthread_mutex_init(mutex, NULL);
初始化互斥锁我仍然不确定为什么它可以解决两个线程但不超过这个?