您能否建议我如何使openacc更加平行。我正在使用插入排序进行mergesort。我应该使用“循环”或“for”来使用循环。对于插入排序,它应该是内核或并行。
#include <stdlib.h>
#include<stdio.h>
#include <time.h>
#include <openacc.h>
#define THR 1000
//Insertion sort
void isort (int *a, int left, int mid, int right) {
int i,j;
# pragma acc kernels
{
# pragma acc parallel loop num_gangs (1024)
for ( i = mid; i <= right; i++) {
for ( j = i - 1; j >= 0; j--) {
if (a[i] < a [j]) {
int temp = a[j];
a[j] = a[i];
a[i] = temp;
i--;
}
}
}
}
}
void merge(int a[], int left, int right,int left_half[], int right_half[])
{
int i, j, k;
int mid = (left + right + 1) / 2;
i = j = 0;
k = left;
while (i < mid - left && j <= right - mid) {
if (left_half[i] < right_half[j]) {
a[k] = left_half[i];
++i;
} else {
a[k] = right_half[j];
++j;
}
++k;
}
// Copying any leftover elements
#pragma acc data copy(a, right_half)
while (j <= right - mid) {
a[k++] = right_half[j++];//copy remaining elements of the first half
}
#pragma acc data copy(a, left_half)
while (i < mid - left) {
a[k++] = left_half[i++]; //copy remaining elements of the second list
}
}
void mergeSort(int a[], int left, int right)
{
if (left < right) {
int mid = (left + right + 1) / 2;
int left_half[mid - left];
int right_half[right - mid + 1];
int i;
# pragma acc kernels
{
// Copying elements
# pragma acc parallel loop shared (left_half, a)
for (i = left; i < mid; ++i) {
left_half[i - left] = a[i];
}
// Copying elements
# pragma acc parallel loop shared (right_half, a)
for (i = mid; i <= right; ++i) {
right_half[i - mid] = a[i];
}
}
// Recursive call
mergeSort(left_half, 0, mid - left - 1);
mergeSort(right_half, 0, right - mid);
// Merge the two partitions
if ((right - left) > THR){
merge(a, left, right, left_half, right_half);
} else {
isort(a, left,mid, right);
}
}
}
int main()
{
int i, n, *a,c;
printf("Enter the number of elements\n");
scanf("%d",&n);
a = (int *)acc_malloc(sizeof(int) * n);
srand(time(0));
for(i=0;i<n;i++){
a[i]=rand()%1000;
}
printf("\nThe unsorted a is:");
printf("\n");
for(i=0;i<n;i++)
printf("%d ",a[i]);;
mergeSort(a, 0, n-1);
printf("\nSorted a:");
printf("\n");
for(i=0;i<n;i++)
printf("%d ",a[i]);
printf("\n");
}
答案 0 :(得分:0)
我不知道openacc的语法。
从openmp语法开始,如果你有更大的数组循环,你甚至可以并行运行for循环的每个循环,而for循环都是并行的。请查看此link1,link2。我不知道你是否通过写# pragma acc parallel loop above for loops
来表达同样的意思,或者如果你在openacc中有这样的东西,你可以添加它。
你可以同时运行两个mergesorts,就像这样。
# pragma acc kernels
{
# pragma acc parallel{mergeSort(left_half, 0, mid - left - 1);}
# pragma acc parallel{mergeSort(right_half, 0, right - mid);}
}