有人可以提供一个OpenMP程序,与没有它的情况相比,加速可见。我发现实现加速非常困难。即使这个简单的程序在OpenMP中运行得更慢。我的处理器是在Linux上运行的英特尔®酷睿™i3-2370M CPU @ 2.40GHz×4(Ubuntu 14.10)
#include <cmath>
#include <stdio.h>
#include <time.h>
int main() {
clock_t t;
t = clock();
const int size = 4;
long long int k;
#pragma omp parallel for num_threads(4)
for(int n=0; n<size; ++n) {
for(int j=0;j<100000000;j++){
}
printf("\n");
}
t = clock() - t;
printf ("It took me %d clicks (%f seconds).\n",t,((float)t)/CLOCKS_PER_SEC);
return 0;
}
答案 0 :(得分:1)
计算积分是经典的,调整零件常数以增加执行时间并更清楚地看到运行时,更多零件,更多执行时间。在启用OpenMP的情况下获得21.3秒,在单核,双线程Intel Pentium 4上获得26.7秒:
#include <math.h>
#include <stdio.h>
#include <omp.h>
#define from 0.0f
#define to 2.0f
#define parts 999999999
#define step ((to - from) / parts)
#define x (from + (step / 2.0f))
int main()
{
double integralSum = 0;
int i;
#pragma omp parallel for reduction(+:integralSum)
for (i = 1; i < (parts+1); ++i)
{
integralSum = integralSum + (step * fabs(pow((x + (step * i)),2) + 4));
}
printf("%f\n", integralSum);
return 0;
}
它计算从0到2的x ^ 2 + 4
的定积分答案 1 :(得分:1)
我遇到了与此相关的问题,我想找到数组的最大值。我犯了同样的错误,我用时钟测量经过的时间。为了解决这个问题,我使用了clock_gettime(),现在它可以工作了。
至于可以测量加速比的示例代码(注意,migth想要更改N的值):
#include <omp.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
struct timespec diff(struct timespec start, struct timespec end)
{
struct timespec temp;
if(end.tv_sec - start.tv_sec == 0)
{
temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
else
{
temp.tv_nsec = ((end.tv_sec - start.tv_sec)*1000000000) + end.tv_nsec - start.tv_nsec;
}
return temp;
}
int main()
{
unsigned int N;
struct timespec t_start, t_end;
clock_t start, end;
srand(time(NULL));
FILE *f = fopen("out.txt", "w");
if(f == NULL)
{
printf("Could not open output\n");
return -1;
}
for(N = 1000000; N < 100000000; N += 1000000)
{
fprintf(f, "%d\t", N);
int* array = (int*)malloc(sizeof(int)*N);
if(array == NULL)
{
printf("Not enough space\n");
return -1;
}
for(unsigned int i = 0; i<N; i++) array[i] = rand();
int max_val = 0.0;
clock_gettime(CLOCK_MONOTONIC, &t_start);
#pragma omp parallel for reduction(max:max_val)
for(unsigned int i=0; i<N; i++)
{
if(array[i] > max_val) max_val = array[i];
}
clock_gettime(CLOCK_MONOTONIC, &t_end);
fprintf(f, "%lf\t", (double)(diff(t_start, t_end).tv_nsec / 1000000000.0));
max_val = 0.0;
clock_gettime(CLOCK_MONOTONIC, &t_start);
for(unsigned int i = 0; i<N; i++)
{
if(array[i] > max_val) max_val = array[i];
}
clock_gettime(CLOCK_MONOTONIC, &t_end);
fprintf(f, "%lf\n", (double)(diff(t_start, t_end).tv_nsec / 1000000000.0));
free(array);
}
fclose(f);
return 0;
}