我在尝试并行化k-means算法的以下功能时遇到了麻烦。 原始代码就是这个:
void PointsToCentroides(int *Punts, int N, int D, int C, double *Centroides, int *PC, int *Sep){
int i, j, k, m;
int dist[C];
for(i=0; i<C; i++)Sep[i]=0;
for(i=0; i<N; i++){
for(j=0; j<C; j++){
dist[j] = 0;
for(k=0; k<D; k++){
dist[j] += (Punts[i*D+k] - Centroides[j*D+k]) * (Punts[i*D+k] - Centroides[j*D+k]);
}
}
m = min(dist, C);
PC[m*N+Sep[m]] = i;
Sep[m]+=1;
}}
我试图将它转换为最后两条指令的关键段并行,但它使程序运行得慢得多。 代码的并行如下:
void PointsToCentroides(int *Punts, int N, int D, int C, double *Centroides, int *PC, int *Sep){
int i, j, k;
for(i=0; i<C; i++)Sep[i]=0;
#pragma omp parallel for shared(PC, Sep)
for(i=0; i<N; i++){
int dist[C], m;
for(j=0; j<C; j++){
dist[j] = 0;
for(k=0; k<D; k++){
dist[j] += (Punts[i*D+k] - Centroides[j*D+k]) * (Punts[i*D+k] - Centroides[j*D+k]);
}
}
m = min(dist, C);
#pragma omp critical
{
PC[m*N+Sep[m]] = i;
Sep[m]+=1;
}
}}
我还尝试创建一个结构,因此每个线程都有自己的数据集来保存它使用的不同点,但是编译器给出了内部错误消息。代码如下:
void PointsToCentroides(int *Punts, int N, int D, int C, double *Centroides, int *PC, int *Sep){
int i;
for(i=0; i<C; i++){
Sep[i] = 0;
}
struct centroids{
int p[N];
int count;
};
struct kt{
struct centroids cent[C];
} ptc[omp_get_max_threads()];
#pragma omp parallel private(i)
{
for(i=0; i<C; i++) ptc[omp_get_thread_num()].cent[i].count = 0;
}
#pragma omp parallel for //shared(PC, Sep)
for(i=0; i<N; i++){
int dist[C], m, j, k;
for(j=0; j<C; j++){
dist[j] = 0;
for(k=0; k<D; k++){
dist[j] += (Punts[i*D+k] - Centroides[j*D+k]) * (Punts[i*D+k] - Centroides[j*D+k]);
}
}
m = min(dist, C);
//PC[m*N+Sep[m]] = i;
//Sep[m]+=1;
ptc[omp_get_thread_num()].cent[m].p[ptc[omp_get_thread_num()].cent[m].count]=i;
ptc[omp_get_thread_num()].cent[m].count ++;
}
#pragma omp parallel private(i)
{
int p = 0;
for(i=0; i<C; i++){
while(p<ptc[omp_get_thread_num()].cent[i].count){
#pragma omp critical
{
PC[i*N+Sep[i]] = ptc[omp_get_thread_num()].cent[i].p[ptc[omp_get_thread_num()].cent[i].count];
Sep[i]++;
}
}
}
}}
编译器提供的错误消息如下:
user@pc:~/Descargas/HG$ gcc -O3 -g hg_k-means.c -fopenmp
hg_k-means.c: En la función ‘PointsToCentroides._omp_fn.0’:
hg_k-means.c:92:10: error interno del compilador: en emit_move_insn, en expr.c:3698
#pragma omp parallel private(i)
^~~
0x75201a emit_move_insn(rtx_def*, rtx_def*)
../.././gcc/expr.c:3697
0x7409f3 extract_bit_field_1
../.././gcc/expmed.c:1748
0x7411f5 extract_bit_field(rtx_def*, unsigned long, unsigned long, int, rtx_def*, machine_mode, machine_mode, bool)
../.././gcc/expmed.c:1932
0x74e433 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
../.././gcc/expr.c:10698
0x758f7a store_expr_with_bounds(tree_node*, rtx_def*, int, bool, bool, tree_node*)
../.././gcc/expr.c:5552
0x759bce expand_assignment(tree_node*, tree_node*, bool)
../.././gcc/expr.c:5321
0x66d1d0 expand_gimple_stmt_1
../.././gcc/cfgexpand.c:3639
0x66d1d0 expand_gimple_stmt
../.././gcc/cfgexpand.c:3737
0x66e79f expand_gimple_basic_block
../.././gcc/cfgexpand.c:5744
0x673906 execute
../.././gcc/cfgexpand.c:6357
Por favor, envíe un informe completo de errores,
con el código preprocesado si es apropiado.
Please include the complete backtrace with any bug report.
Véase <https://gcc.gnu.org/bugs/> para instrucciones.
我对OpenMP并行化方法有点新,所以欢迎任何帮助。