我想并行化一个包含两个for循环的OpenMP光线跟踪算法。
除了设置omp_set_num_threads(omp_get_max_threads())
并将#pragma omp parallel for
放在第一个for循环前面之外,还有什么我可以做的吗?
到目前为止,我的算法速度提高了2.13倍。
代码:
start = omp_get_wtime();
#pragma omp parallel for
for (int i = 0; i < (viewport.xvmax - viewport.xvmin); i++)
{
for (int j = 0; j < (viewport.yvmax - viewport.yvmin); j++)
{
int intersection_object = -1; // none
int reflected_intersection_object = -1; // none
double current_lambda = 0x7fefffffffffffff; // maximum positive double
double current_reflected_lambda = 0x7fefffffffffffff; // maximum positive double
RAY ray, shadow_ray, reflected_ray;
PIXEL pixel;
SPHERE_INTERSECTION intersection, current_intersection, shadow_ray_intersection, reflected_ray_intersection, current_reflected_intersection;
double red, green, blue;
double theta, reflected_theta;
bool bShadow = false;
pixel.i = i;
pixel.j = j;
// 1. compute ray:
compute_ray(&ray, &view_point, &viewport, &pixel, &camera_frame, focal_distance);
// 2. check if ray hits an object:
for (int k = 0; k < NSPHERES; k++)
{
if (sphere_intersection(&ray, &sphere[k], &intersection))
{
// there is an intersection between ray and object
// 1. Izracunanaj normalu...
intersection_normal(&sphere[k], &intersection, &ray);
// 2. ako je lambda presjecista manji od trenutacnog:
if (intersection.lambda_in < current_lambda)
{
current_lambda = intersection.lambda_in;
intersection_object = k;
copy_intersection_struct(¤t_intersection, &intersection);
}
// izracunaj current lambda current_lambda =
// oznaci koji je trenutacni object : intersection_object =
// kopiraj strukturu presjeka : copy_intersection_struct();
}
}
// Compute the color of the pixel:
if (intersection_object > -1)
{
compute_shadow_ray(&shadow_ray, &intersection, &light);
theta = dotproduct(&(shadow_ray.direction), &(intersection.normal));
for (int l = 0; l<NSPHERES; l++)
{
if (l != intersection_object)
{
if (sphere_intersection(&shadow_ray, &sphere[l], &shadow_ray_intersection) && (theta>0.0))
bShadow = true;
}
}
if (bShadow)
{ // if in shadow, add only ambiental light to the surface color
red = shadow(sphere[intersection_object].ka_rgb[CRED], ambi_light_intensity);
green = shadow(sphere[intersection_object].ka_rgb[CGREEN], ambi_light_intensity);
blue = shadow(sphere[intersection_object].ka_rgb[CBLUE], ambi_light_intensity);
}
else
{
// the intersection is not in shadow:
red = blinnphong_shading(¤t_intersection, &light, &view_point,
sphere[intersection_object].kd_rgb[CRED], sphere[intersection_object].ks_rgb[CRED], sphere[intersection_object].ka_rgb[CRED], sphere[intersection_object].shininess,
light_intensity, ambi_light_intensity);
green = blinnphong_shading(¤t_intersection, &light, &view_point,
sphere[intersection_object].kd_rgb[CGREEN], sphere[intersection_object].ks_rgb[CGREEN], sphere[intersection_object].ka_rgb[CGREEN], sphere[intersection_object].shininess,
light_intensity, ambi_light_intensity);
blue = blinnphong_shading(¤t_intersection, &light, &view_point,
sphere[intersection_object].kd_rgb[CBLUE], sphere[intersection_object].ks_rgb[CBLUE], sphere[intersection_object].ka_rgb[CBLUE], sphere[intersection_object].shininess,
light_intensity, ambi_light_intensity);
}
tabelaPixlov[i][j].red = red;
tabelaPixlov[i][j].green = green;
tabelaPixlov[i][j].blue = blue;
glColor3f(tabelaPixlov[i][j].red, tabelaPixlov[i][j].green, tabelaPixlov[i][j].blue);
intersection_object = -1;
bShadow = false;
}
else
{
// draw the pixel with the background color
tabelaPixlov[i][j].red = 0;
tabelaPixlov[i][j].green = 0;
tabelaPixlov[i][j].blue = 0;
intersection_object = -1;
bShadow = false;
}
current_lambda = 0x7fefffffffffffff;
current_reflected_lambda = 0x7fefffffffffffff;
}
}
//glFlush();
stop = omp_get_wtime();
for (int i = 0; i < (viewport.xvmax - viewport.xvmin); i++)
{
for (int j = 0; j < (viewport.yvmax - viewport.yvmin); j++)
{
glColor3f(tabelaPixlov[i][j].red, tabelaPixlov[i][j].green, tabelaPixlov[i][j].blue);
glBegin(GL_POINTS);
glVertex2i(i, j);
glEnd();
}
}
printf("%f\n št niti:%d\n", stop - start, omp_get_max_threads());
glutSwapBuffers();
}
答案 0 :(得分:0)
使用光线跟踪,您应该使用schedule(dynamic)
。除此之外,我建议融合循环
#pragma omp parallel for schedule(dynamic) {
for(int n=0; n<((viewport.xvmax - viewport.xvmin)*(viewport.yvmax - viewport.yvmin); n++) {
int i = n/(viewport.yvmax - viewport.yvmin);
int j = n%(viewport.yvmax - viewport.yvmin)
//...
}
另外,为什么要设置线程数?只需使用应设置为逻辑核心数的默认值。如果您有超线程光线跟踪是将从超线程中受益的算法之一,因此您不希望将线程数设置为物理内核数。
除了在OpenMP中使用MIMD之外,我建议使用SIMD进行光线跟踪。有关如何执行此操作的示例http://www.sci.utah.edu/~wald/PhD/,请参阅Ingo Wald的博士论文。基本上你在一个SSE(AVX)寄存器中拍摄四(8)条光线,然后平行地沿光线树下行每条光线。但是,如果一条光线完成,则按住它并等待所有四条光线完成(这与GPU上的操作类似)。已经写了很多论文,因为基于这个想法有更高级的技巧。