我正在尝试在C中实现Sobel Filter的并行化+矢量化版本,其中OpenMP pragma用于并行化,而#pragma simd用于矢量化。我的输入是1024 x 1024的.pgm图像。我正在使用以下命令在Xeon Knights Landing处理器上使用英特尔编译器进行编译:
icc -qopenmp -O3 -qopt-report3 xeon.c -o xeon
因此,我面对的问题一般是:
a)何时进行并行化,何时进行矢量化。我有一个由四个for循环组成的嵌套for循环 - >我应该并行化或矢量化这段代码
b)我的分钟'和' max'价值观是错误的。它们都是共享变量,因此容易出现竞争条件,所以我在它们周围增加了一个关键的#pragma omp。但是,为这两个变量打印的值仍然是错误的,我不知道为什么。我甚至在print语句之前添加了一个屏障,以确保所有线程在最小值和最大值打印之前通过该关键部分
c)#pragma omp的关键是让我的程序非常慢。实际上,执行时间甚至比顺序运行时更长。有什么方法可以避免吗?mypgm.h
/* pgm file IO headerfile ------ mypgm.h */
/* Constant declaration */
#define MAX_IMAGEWIDTH 1024
#define MAX_IMAGEHEIGHT 1024
#define MAX_BRIGHTNESS 255 /* Maximum gray level */
#define GRAYLEVEL 256 /* No. of gray levels */
#define MAX_FILENAME 256 /* Filename length limit */
#define MAX_BUFFERSIZE 256
/* Global constant declaration */
/* Image storage arrays */
float image1[MAX_IMAGEWIDTH][MAX_IMAGEHEIGHT] __attribute__((aligned(64))),
image2[MAX_IMAGEWIDTH][MAX_IMAGEHEIGHT] __attribute__((aligned(64)));
int x_size1, y_size1, /* width & height of image1*/
x_size2, y_size2; /* width & height of image2 */
/* Prototype declaration of functions */
void load_image_data( ); /* image input */
void save_image_data( ); /* image output*/
void load_image_file(char *); /* image input */
void save_image_file(char *); /* image output*/
/* Main body of functions */
void load_image_data( )
/* Input of header & body information of pgm file */
/* for image1[ ][ ],x_size1,y_size1 */
{
char file_name[MAX_FILENAME];
char buffer[MAX_BUFFERSIZE];
FILE *fp; /* File pointer */
int max_gray; /* Maximum gray level */
int x, y; /* Loop variable */
/* Input file open */
printf("\n-----------------------------------------------------\n");
printf("Monochromatic image file input routine \n");
printf("-----------------------------------------------------\n\n");
printf(" Only pgm binary file is acceptable\n\n");
printf("Name of input image file? (*.pgm) : ");
scanf("%s", file_name);
fp = fopen(file_name, "rb");
if (NULL == fp) {
printf(" The file doesn't exist!\n\n");
exit(1);
}
/* Check of file-type ---P5 */
fgets(buffer, MAX_BUFFERSIZE, fp);
if (buffer[0] != 'P' || buffer[1] != '5') {
printf(" Mistaken file format, not P5!\n\n");
exit(1);
}
/* input of x_size1, y_size1 */
x_size1 = 0;
y_size1 = 0;
while (x_size1 == 0 || y_size1 == 0) {
fgets(buffer, MAX_BUFFERSIZE, fp);
if (buffer[0] != '#') {
sscanf(buffer, "%d %d", &x_size1, &y_size1);
}
}
/* input of max_gray */
max_gray = 0;
while (max_gray == 0) {
fgets(buffer, MAX_BUFFERSIZE, fp);
if (buffer[0] != '#') {
sscanf(buffer, "%d", &max_gray);
}
}
/* Display of parameters */
printf("\n Image width = %d, Image height = %d\n", x_size1, y_size1);
printf(" Maximum gray level = %d\n\n",max_gray);
if (x_size1 > MAX_IMAGEWIDTH || y_size1 > MAX_IMAGEHEIGHT) {
printf(" Image size exceeds %d x %d\n\n",
MAX_IMAGEWIDTH, MAX_IMAGEHEIGHT);
printf(" Please use smaller images!\n\n");
exit(1);
}
if (max_gray != MAX_BRIGHTNESS) {
printf(" Invalid value of maximum gray level!\n\n");
exit(1);
}
/* Input of image data*/
#pragma simd
for (y = 0; y < y_size1; y++) {
#pragma simd
for (x = 0; x < x_size1; x++) {
image1[y][x] = (unsigned char)fgetc(fp);
}
}
printf("-----Image data input OK-----\n\n");
printf("-----------------------------------------------------\n\n");
fclose(fp);
}
void save_image_data( )
/* Output of image2[ ][ ], x_size2, y_size2 in pgm format*/
{
char file_name[MAX_FILENAME];
FILE *fp; /* File pointer */
int x, y; /* Loop variable */
/* Output file open */
printf("-----------------------------------------------------\n");
printf("Monochromatic image file output routine\n");
printf("-----------------------------------------------------\n\n");
printf("Name of output image file? (*.pgm) : ");
scanf("%s",file_name);
fp = fopen(file_name, "wb");
/* output of pgm file header information */
fputs("P5\n", fp);
fputs("# Created by Image Processing\n", fp);
fprintf(fp, "%d %d\n", x_size2, y_size2);
fprintf(fp, "%d\n", MAX_BRIGHTNESS);
/* Output of image data */
#pragma simd
for (y = 0; y < y_size2; y++) {
#pragma simd
for (x = 0; x < x_size2; x++) {
fputc(image2[y][x], fp);
}
}
printf("\n-----Image data output OK-----\n\n");
printf("-----------------------------------------------------\n\n");
fclose(fp);
}
void load_image_file(char *filename)
/* Input of header & body information of pgm file */
/* for image1[ ][ ],x_size1,y_size1 */
{
char buffer[MAX_BUFFERSIZE];
FILE *fp; /* File pointer */
int max_gray; /* Maximum gray level */
int x, y; /* Loop variable */
/* Input file open */
fp = fopen(filename, "rb");
if (NULL == fp) {
printf(" The file doesn't exist!\n\n");
exit(1);
}
/* Check of file-type ---P5 */
fgets(buffer, MAX_BUFFERSIZE, fp);
if (buffer[0] != 'P' || buffer[1] != '5') {
printf(" Mistaken file format, not P5!\n\n");
exit(1);
}
/* input of x_size1, y_size1 */
x_size1 = 0;
y_size1 = 0;
while (x_size1 == 0 || y_size1 == 0) {
fgets(buffer, MAX_BUFFERSIZE, fp);
if (buffer[0] != '#') {
sscanf(buffer, "%d %d", &x_size1, &y_size1);
}
}
/* input of max_gray */
max_gray = 0;
while (max_gray == 0) {
fgets(buffer, MAX_BUFFERSIZE, fp);
if (buffer[0] != '#') {
sscanf(buffer, "%d", &max_gray);
}
}
if (x_size1 > MAX_IMAGEWIDTH || y_size1 > MAX_IMAGEHEIGHT) {
printf(" Image size exceeds %d x %d\n\n",
MAX_IMAGEWIDTH, MAX_IMAGEHEIGHT);
printf(" Please use smaller images!\n\n");
exit(1);
}
if (max_gray != MAX_BRIGHTNESS) {
printf(" Invalid value of maximum gray level!\n\n");
exit(1);
}
/* Input of image data*/
#pragma simd
for (y = 0; y < y_size1; y++) {
#pragma simd
for (x = 0; x < x_size1; x++) {
image1[y][x] = (float)fgetc(fp);
}
}
fclose(fp);
}
void save_image_file(char *filename)
/* Output of image2[ ][ ], x_size2, y_size2 */
/* into pgm file with header & body information */
{
FILE *fp; /* File pointer */
int x, y; /* Loop variable */
fp = fopen(filename, "wb");
/* output of pgm file header information */
fputs("P5\n", fp);
fputs("# Created by Image Processing\n", fp);
fprintf(fp, "%d %d\n", x_size2, y_size2);
fprintf(fp, "%d\n", MAX_BRIGHTNESS);
/* Output of image data */
#pragma simd
for (y = 0; y < y_size2; y++) {
#pragma simd
for (x = 0; x < x_size2; x++) {
fputc(image2[y][x], fp);
}
}
fclose(fp);
}
xeon.c
/* sobel.c */
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#include <time.h>
#include <omp.h>
#include "mypgm.h"
void sobel_filtering( )
/* Spatial filtering of image data */
/* Sobel filter (horizontal differentiation */
/* Input: image1[y][x] ---- Outout: image2[y][x] */
{
/* Definition of Sobel filter in horizontal direction */
float weight[3][3] __attribute__((aligned(64)))= {{ -1, 0, 1 },
{ -2, 0, 2 },
{ -1, 0, 1 }};
float pixel_value;
float min, max;
int x, y, i, j; /* Loop variable */
/* Maximum values calculation after filtering*/
printf("Now, filtering of input image is performed\n\n");
min = DBL_MAX;
max = -DBL_MAX;
#pragma omp parallel shared(image2,weight,image1,min,max) private(y,x,j,i)
{
#pragma omp for collapse(2)
for (y=0;y<y_size1;y++) {
for (x=0;x<x_size1;x++) {
image2[y][x]=0;
}
}
#pragma omp for collapse(2) reduction(+:pixel_value)
for (y = 1; y < y_size1 - 1; y++) {
//#pragma simd
for (x = 1; x < x_size1 - 1; x++) {
pixel_value = 0.0;
#pragma simd
//#pragma omp for collapse(2)
for (j = -1; j <= 1; j++) {
#pragma simd
for (i = -1; i <= 1; i++) {
pixel_value += weight[j + 1][i + 1] * image1[y + j][x + i];
}
}
image2[y][x] = (float)pixel_value;
#pragma omp critical
{
if (pixel_value < min)
min = pixel_value;
if (pixel_value > max)
max = pixel_value;
}
}
}
#pragma omp barrier
#pragma omp single
{
if ((int)(max - min) == 0) {
printf("Nothing exists!!!\n\n");
exit(1);
}
printf("%f\n",min);
printf("%f\n",max);
}
/* Generation of image2 after linear transformtion */
#pragma omp for private(x) collapse(2)
//#pragma simd
for (y=1;y<y_size1-1;y++) {
//#pragma simd
for (x=1;x<x_size1-1;x++) {
image2[y][x] = MAX_BRIGHTNESS * (image2[y][x] - min) / (max - min);
}
}
} // ends the parallel section
} //end of sobel filtering function
int main( )
{
load_image_data( ); /* Input of image1 */
clock_t begin=clock();
sobel_filtering( ); /* Sobel filter is applied to image1 */
clock_t end=clock();
double time_spent = (double)(end-begin)/CLOCKS_PER_SEC;
printf("\n\nTiming result of multiplication of matrix-vector: %f\n",time_spent);
save_image_data( ); /* Output of image2 */
return 0;
}
谢谢