一个加速的示例openmp程序

时间:2015-04-13 21:18:28

标签: c parallel-processing openmp

有人可以提供一个OpenMP程序,与没有它的情况相比,加速可见。我发现实现加速非常困难。即使这个简单的程序在OpenMP中运行得更慢。我的处理器是在Linux上运行的英特尔®酷睿™i3-2370M CPU @ 2.40GHz×4(Ubuntu 14.10)

#include <cmath>
#include <stdio.h>
#include <time.h> 
int main() {
   clock_t t;
   t = clock();
   const int size = 4;
   long long int k;

    #pragma omp parallel for num_threads(4)
    for(int n=0; n<size; ++n) {
       for(int j=0;j<100000000;j++){ 
       }
       printf("\n");
    }

    t = clock() - t;
    printf ("It took me %d clicks (%f seconds).\n",t,((float)t)/CLOCKS_PER_SEC);

    return 0;
}

2 个答案:

答案 0 :(得分:1)

计算积分是经典的,调整零件常数以增加执行时间并更清楚地看到运行时,更多零件,更多执行时间。在启用OpenMP的情况下获得21.3秒,在单核,双线程Intel Pentium 4上获得26.7秒:

#include <math.h>
#include <stdio.h>
#include <omp.h>

#define from 0.0f
#define to 2.0f
#define parts 999999999
#define step ((to - from) / parts)
#define x (from + (step / 2.0f))

int main()
{
        double integralSum = 0;
        int i;
        #pragma omp parallel for reduction(+:integralSum)
        for (i = 1; i < (parts+1); ++i)
        {
                integralSum = integralSum + (step * fabs(pow((x + (step * i)),2) + 4));
        }

        printf("%f\n", integralSum);

        return 0;
}

它计算从0到2的x ^ 2 + 4

的定积分

答案 1 :(得分:1)

我遇到了与此相关的问题,我想找到数组的最大值。我犯了同样的错误,我用时钟测量经过的时间。为了解决这个问题,我使用了clock_gettime(),现在它可以工作了。

至于可以测量加速比的示例代码(注意,migth想要更改N的值):

#include <omp.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>

struct timespec diff(struct timespec start, struct timespec end)
{
struct timespec temp;

if(end.tv_sec - start.tv_sec == 0)
{
    temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
else
{
    temp.tv_nsec = ((end.tv_sec - start.tv_sec)*1000000000) + end.tv_nsec - start.tv_nsec;
}

return temp;
}

int main()
{
unsigned int N;
struct timespec t_start, t_end;
clock_t start, end;

srand(time(NULL));

FILE *f = fopen("out.txt", "w");
if(f == NULL)
{
    printf("Could not open output\n");
    return -1;
}

for(N = 1000000; N < 100000000; N += 1000000)
{
    fprintf(f, "%d\t", N);
    int* array = (int*)malloc(sizeof(int)*N);
    if(array == NULL)
    {
        printf("Not enough space\n");
        return -1;
    }
    for(unsigned int i = 0; i<N; i++) array[i] = rand();

    int max_val = 0.0;

    clock_gettime(CLOCK_MONOTONIC, &t_start);

    #pragma omp parallel for reduction(max:max_val)
    for(unsigned int i=0; i<N; i++)
    {
        if(array[i] > max_val) max_val = array[i];
    }

    clock_gettime(CLOCK_MONOTONIC, &t_end);

    fprintf(f, "%lf\t", (double)(diff(t_start, t_end).tv_nsec / 1000000000.0));

    max_val = 0.0;

    clock_gettime(CLOCK_MONOTONIC, &t_start);
    for(unsigned int i = 0; i<N; i++)
    {
        if(array[i] > max_val) max_val = array[i];
    }
    clock_gettime(CLOCK_MONOTONIC, &t_end);

    fprintf(f, "%lf\n", (double)(diff(t_start, t_end).tv_nsec / 1000000000.0));

    free(array);
}

fclose(f);

return 0;
}