我在OpenMP中有以下Mandelbrot设置代码。我的C代码工作正常,它产生的图片是完美的。但是使用OpenMP,它编译并正确运行,但不幸的是我无法打开输出.ppm文件,只是Gimp无法读取它。
// mandopenmp.c
// to compile: gcc -fopenmp mandopenmp.c -o mandopenmp -lm
// usage: ./mandopenmp <no_of_iterations> > output.ppm
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <omp.h>
typedef struct {
int r, g, b;
} rgb;
void color(rgb **m, int x, int y, int red, int green, int blue)
{
m[x][y].r = red;
m[x][y].g = green;
m[x][y].b = blue;
}
void mandelbrot(int niterations, rgb **m)
{
int w = 600, h = 400, x, y, i;
// each iteration, it calculates: newz = oldz*oldz + p,
// where p is the current pixel, and oldz stars at the origin
double pr, pi; // real and imaginary part of the pixel p
double newRe, newIm, oldRe, oldIm; // real and imaginary parts of new and old z
double zoom = 1, moveX = -0.5, moveY = 0; // you can change these to zoom and change position
printf("P6\n# AUTHOR: Erkan Tairi\n");
printf("%d %d\n255\n",w,h);
//loop through every pixel
#pragma omp parallel for private(x,i,pr,pi,newRe,newIm,oldRe,oldIm) schedule(dynamic, 1)
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
// calculate the initial real and imaginary part of z,
// based on the pixel location and zoom and position values
pr = 1.5 * (x - w / 2) / (0.5 * zoom * w) + moveX;
pi = (y - h / 2) / (0.5 * zoom * h) + moveY;
newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
// start the iteration process
for(i = 0; i < niterations; i++) {
// remember value of previous iteration
oldRe = newRe;
oldIm = newIm;
// the actual iteration, the real and imaginary part are calculated
newRe = oldRe * oldRe - oldIm * oldIm + pr;
newIm = 2 * oldRe * oldIm + pi;
// if the point is outside the circle with radius 2: stop
if((newRe * newRe + newIm * newIm) > 4) break;
}
if(i == niterations)
color(m, x, y, 0, 0, 0); // black
else
{
// normalized iteration count method for proper coloring
double z = sqrt(newRe * newRe + newIm * newIm);
int brightness = 256. * log2(1.75 + i - log2(log2(z))) / log2((double)niterations);
color(m, x, y, brightness, brightness, 255);
}
}
}
}
int main(int argc, char *argv[])
{
int niterations, i, j;
if(argc != 2)
{
printf("Usage: %s <no_of_iterations> > output.ppm\n", argv[0]);
exit(1);
}
niterations = atoi(argv[1]);
rgb **m;
m = malloc(600 * sizeof(rgb *));
for(i = 0; i < 600; i++)
m[i] = malloc(400 * sizeof(rgb));
double begin = omp_get_wtime();
mandelbrot(niterations, m);
for(i = 0; i < 600; i++) {
for(j = 0; j < 400; j++) {
fputc((char)m[i][j].r, stdout);
fputc((char)m[i][j].g, stdout);
fputc((char)m[i][j].b, stdout);
}
}
double end = omp_get_wtime();
double time_spent = end - begin;
fprintf(stderr, "Elapsed time: %.2lf seconds.\n", time_spent);
for(i = 0; i < 600; i++)
free(m[i]);
free(m);
return 0;
}
答案 0 :(得分:4)
我不知道Mandrelbot套装的内部结构,但我会根据您的程序工作流程进行拍摄。
可能是因为您在并行部分中将颜色写入输出文件。这意味着你的像素是在计算过程完成时编写的,但这并不意味着像素X
的计算过程将在处理像素X+1
之前结束。
这样,在写入文件时,您将首先写入(例如)像素X+1
然后像素X
,然后混合颜色。
尝试将输出结果写入矩阵。您必须更改color
功能,添加两个参数i
和j
以及要写入的像素的坐标。
整个处理完成并计算每个像素后,应将矩阵的像素写入输出文件。
代码:
typedef struct {
int r, g, b;
} rgb;
void color(rgb **m, int x, int y, int red, int green, int blue) {
m[x][y].r = red;
m[x][y].g = green;
m[x][y].b = blue;
}
void mandelbrot(rgb **m, int niterations) { // note the new argument, m.
// and your code goes on and on... until:
if ( i == niterations )
color(m, x, y, 0, 0, 0);
else {
// normalized iteration count method for proper coloring
double z = sqrt(newRe * newRe + newIm * newIm);
int brightness = 256. * log2(1.75 + i - log2(log2(z))) / log2((double)niterations);
color(m, x, y, brightness, brightness, 255);
}
}
}
}
int main(int argc, char *argv[]) {
// everything ok until...
double begin = omp_get_wtime();
rgb **m;
m = malloc(sizeof(rgb*) * 600);
for ( i = 0; i < 600; i++ ) {
m[i] = malloc(400 * sizeof(rgb));
// finally call mandelbrot!
mandelbrot(m, niterations);
double end = omp_get_wtime();
// now that you have computed your set, you just walk the array writing the output to the file.
for ( i = 0; i < 600; i++ ) {
free(m[i]);
}
free(m);
double time_spent = end - begin;
fprintf(stderr, "Elapsed time: %.2lf seconds.\n", time_spent);
return 0;
}
答案 1 :(得分:1)
您的实施存在缺陷。您已声明许多必须为private
的变量为shared
。其中包括pr
,pi
,newRe
,newIm
。此外,oldRe
和oldIm
默认共享,因为它们是在并行区域外部的范围内声明的。这些都应该是私人的:
#pragma omp parallel for private(x,i,pr,pi,newRe,newIm,oldRe,oldIm)
parallel for
循环的默认调度通常(但不一定总是)static
。对于像分形这样的东西,这不是最佳的,因为它需要不同的时间来计算图像中的每一行或列。因此,您应该应用schedule(dynamic,1)
子句并使用块大小(在这种情况下为1
),直到您获得最佳加速。
#pragma omp parallel for private(x,i,pr,pi,newRe,newIm,oldRe,oldIm) \
schedule(dynamic,1)
答案 2 :(得分:0)
如果您按顺序写入文件(在编辑之前在原始代码中执行),则可以在写入文件之前使用ordered
编译指示。这将使用您的原始代码使图像正确。请参阅以下链接
http://bisqwit.iki.fi/story/howto/openmp/#ExampleCalculatingTheMandelbrotFractalInParallel
然而,这不是最佳解决方案。最佳解决方案是先写入内存缓冲区,然后在mandelbrot代码完成填充缓冲区后再写入文件(就像在新代码中一样)。
我有一些建议可以加快你的代码速度。融合你的x和y循环(如链接所示)并使用schedule动态(也显示在该链接中),因为每个像素占用不同的时间。最后,使用SSE / AVX一次操作两个(SSE)或四个(AVX)像素。总的来说,OpenMP和SSE / AVX的速度应该超过20倍。
#pragma omp ordered {
if(i == niterations)
color(m, x, y, 0, 0, 0); // black - use original color function which writes to file
else
{
// normalized iteration count method for proper coloring
double z = sqrt(newRe * newRe + newIm * newIm);
int brightness = 256. * log2(1.75 + i - log2(log2(z))) / log2((double)niterations);
color(m, x, y, brightness, brightness, 255); //use original color function which writes to file
}
}