将并行算法分配给前缀和问题有一些问题。我使用openMP进行并行实现。我的代码如下c所示。
结果显示:
seqsum[6] = 28 != parallelsum[6] = 34
请指教。感谢。
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "omp.h"
#include <string.h>
#define N 10 //33554432 // 2 ^ 25
#define NUM_THREADS 4
void computeparallelprefix(int *iplist, int *_pprefixsum, unsigned long size)
{
int nthr, *z, *x = _pprefixsum;
int i, j, tid, work, lo, hi;
#pragma omp parallel shared(nthr,x,z) private(i,j,tid,work,lo,hi)
{
int prev_sum;
memcpy((void *)x, (void *)iplist, sizeof(int)*size);
// Assume nthr = 2^k
#pragma omp single
{
nthr = omp_get_num_threads();
z = malloc(sizeof(int)*nthr);
}
tid = omp_get_thread_num();
work = size /nthr + (i = tid < size%nthr ? 1 : 0);
lo = (size/nthr)*tid + (i==1 ? tid : size%nthr);
hi = lo + work;
if (hi > size)
hi = size;
// local prefix sum over x
for(i=lo+1; i<hi; i++)
x[i] += x[i-1];
// local prefix sum for tid
z[tid] = x[hi-1];
#pragma omp barrier
// global prefix sum over z
for(j=1; j<nthr; j=2*j) {
if (tid >= j)
z[tid] = z[tid] + z[tid-j];
#pragma omp barrier
}
// Update local prefix sum x
prev_sum = z[tid] - x[hi-1];
for(i=lo;i<hi;i++)
x[i] += prev_sum;
}
free(z);
}
void initlist(int *iplist, unsigned long size)
{
int i;
for ( i = 0; i < size; i++)
iplist[i] = i+1;
// iplist[i] = rand() % 13;
}
void printlist(int *list, unsigned long size)
{
int i;
for(i = 0; i < size; i++) {
printf("%d ", list[i]);
}
printf("\n");
}
void computeseqprefixsum(int *iplist, int *seqprefixsum, unsigned long size)
{
int i;
seqprefixsum[0] = iplist[0];
for(i = 1; i < size; i++) {
seqprefixsum[i] = seqprefixsum[i-1] + iplist[i];
}
}
void checkresults(int *seqsum, int *parallelsum, unsigned long size)
{
int i;
for(i = 0; i < size; i++)
{
if(seqsum[i] != parallelsum[i]) {
printf("seqsum[%d] = %d != parallelsum[%d] = %d\n", i, seqsum[i], i,
parallelsum[i]);
exit(1);
}
}
}
int main(int argc, char *argv[])
{
// seed the rand generator
srand(time(NULL));
double seqstart, seqend, parstart, parend, seqtime, partime;
// initialize list
int *iplist, *seqprefixsum, *pprefixsum ;
iplist = (int*) malloc(sizeof(int) * N);
seqprefixsum = (int*) malloc(sizeof(int) * N);
pprefixsum = (int*) malloc(sizeof(int) * N);
if(iplist == NULL || seqprefixsum == NULL || pprefixsum == NULL) {
printf("memory cannot be allocated\n");
exit(1);
}
initlist(iplist, N);
seqstart = omp_get_wtime();
computeseqprefixsum(iplist, seqprefixsum, N);
seqend = omp_get_wtime();
seqtime = seqend - seqstart;
omp_set_num_threads(NUM_THREADS);
parstart = omp_get_wtime();
computeparallelprefix(iplist, pprefixsum, N);
parend= omp_get_wtime();
partime = parend - parstart;
checkresults(seqprefixsum, pprefixsum, N);
printf("Seq Time : %f, Par Time : %f, Speedup : %f\n", seqtime, partime,
seqtime/partime);
free(iplist); free(seqprefixsum); free(pprefixsum);
return 0;
}
答案 0 :(得分:0)
您对代码的前缀和有正确的想法。
我不确定为什么你没有得到正确的结果,但我清理了你的代码,我的版本得到了正确的结果。有关详细信息parallel-cumulative-prefix-sums-in-openmp-communicating-values-between-thread
,请参阅以下问题#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "omp.h"
#define N 11 //33554432 // 2 ^ 25
void computeparallelprefix(int *iplist, int *_pprefixsum, unsigned long size)
{
int nthr, *z, *x = _pprefixsum;
#pragma omp parallel
{
int i;
#pragma omp single
{
nthr = omp_get_num_threads();
z = malloc(sizeof(int)*nthr+1);
z[0] = 0;
}
int tid = omp_get_thread_num();
int sum = 0;
#pragma omp for schedule(static)
for(i=0; i<size; i++) {
sum += iplist[i];
x[i] = sum;
}
z[tid+1] = sum;
#pragma omp barrier
int offset = 0;
for(i=0; i<(tid+1); i++) {
offset += z[i];
}
#pragma omp for schedule(static)
for(i=0; i<size; i++) {
x[i] += offset;
}
}
free(z);
}
int main(void ) {
int *iplist, *pprefixsum ;
iplist = (int*) malloc(sizeof(int) * N);
pprefixsum = (int*) malloc(sizeof(int) * N);
for(int i=0; i<N; i++) iplist[i] = i+1;
for(int i=0; i<N; i++) printf("%d ", iplist[i]); printf("\n");
computeparallelprefix(iplist, pprefixsum, N);
for(int i=0; i<N; i++) printf("%d ", pprefixsum[i]); printf("\n");
for(int i=0; i<N; i++) printf("%d ", (i+1)*(i+2)/2); printf("\n");
return 0;
}