我最近开始使用OpenMP多线程(MT)我的图像处理项目。
除了一个函数之外,我没有遇到任何问题(不是计算量很大,但浮动操作比其他函数更多)。
首先,让我们说单线程(ST)结果等于图像X,并且MT结果是Y.
当使用小窗口进行平均时,X == Y,但是当窗口越来越大(5x5)时,X!= Y.
所以我介绍了一些"打印"使用print boom查看特定像素的值!再次X == Y.这是我想要了解的。 为什么当我在该代码中打印时,结果返回到结果X?
请注意,我尝试将浮点模型(英特尔编译器)更改为精确和扩展以及ST和MT等于的两个模型,但是新的ST结果Z!= X并且比使用默认浮动更长点模型。
编辑:当前代码:
const int tileOffset = 1;
unsigned char** texturePtr = (unsigned char**)texture->getRowPtr();
short** wrkSrcPtr = (short**)wrkSrc->getRowPtr();
short** imFitAPtr = (short**)imFitA->getRowPtr();
short** imFitBPtr = (short**)imFitB->getRowPtr();
short** imFitCPtr = (short**)imFitC->getRowPtr();
// now, compute raw texture value for each pixel using the above plane equations
#pragma omp parallel num_threads(g_options->ompNumberThreads) if(g_options->ompThreaded)
{
#pragma omp for
for ( int i = 0; i < src->getHeight(); i = i + tileOffset ) {
for ( int j = 0; j < src->getWidth(); j = j + tileOffset ) {
bool printPoint = false;
int jVal = 333;
int iVal = 99;
if ( j == jVal && i == src->getHeight() - iVal - 1 ) {
printPoint = true;
printf("\n\nAt (%d, %d) with Thread %d \n", jVal, iVal, omp_get_thread_num());
}
jVal = 343;
iVal = 204;
if ( j == jVal && i == src->getHeight() - iVal - 1 ) {
printPoint = true;
printf("\n\nAt (%d, %d) with Thread %d \n", jVal, iVal, omp_get_thread_num());
}
const int ti = i * tileOffset;
const int tj = j * tileOffset;
const float planeA = imFitAPtr[i][j] / 32000.0f*255.0f;
const float planeB = imFitBPtr[i][j] / 32000.0f*255.0f;
const float planeC = imFitCPtr[i][j] / 32000.0f*255.0f;
float sum2 = 0.0f;
float sum = 0.0f;
int nbSum = 0;
if ( printPoint ) {
printf("Fit (A,B,C) = (%d, %d, %d) and In float (%f, %f, %f) \n",
imFitAPtr[i][j], imFitBPtr[i][j], imFitCPtr[i][j],
planeA, planeB, planeC);
}
for ( int ri = i - halfROI; ri <= i + halfROI; ri++ ) {
for ( int rj = j - halfROI; rj <= j + halfROI; rj++ ) {
// sanity checks (image boundaries)
if ( ri < 0 || ri >= src->getHeight() || rj < 0 || rj >= src->getWidth() ) continue;
// eval the local plane at that pixel and compute the residual
const float localPlaneValue = planeA * ( rj - j ) + planeB * ( ri - i ) + planeC;
const float residual = wrkSrcPtr[ri][rj] / 32000.0f*255.0f - localPlaneValue;
const float rr = residual*residual;
if ( printPoint )
printf("Local: %f, residual: %f, resSQ: %f, sum2: %f and sum: %f \n ", localPlaneValue, residual, rr, sum2, sum);
sum2 += rr;
sum += residual;
nbSum++;
if ( printPoint )
printf("Add sum2: %f, add sum: %f and nb: %d \n ", sum2, sum, nbSum);
}
}
if ( printPoint )
printf("\n");
// the texture for that pixel is the stdev
float texVal = 0.0f;
if ( nbSum > 1 ) {
texVal = sqrtf(max(( sum2 - sum * sum / nbSum ) / ( nbSum - 1 ), 0.0f)) * scaling;
if ( texVal > 255.0f ) texVal = 255;
}
texturePtr[ti][tj] = (unsigned char)texVal;
if ( printPoint )
printf("Final value : %d (In float: %f) \n\n", texturePtr[ti][tj], texVal);
}
}
} // End OMP
使用&#34;外部打印&#34;我注意到,平方残差(rr)和平方和(sum2)是ST和MT之间不稳定的值。
答案 0 :(得分:0)
问题似乎与windows下的编译器有关。
此代码是使用英特尔Composer XE 2015编译的。但是当我尝试使用Visual Studio v140时,似乎代码在使用和不使用OMP时都是相似的。
我没有尝试使用较新的英特尔编译器(例如2017)。这个问题在Linux下的英特尔Composer XE 2015上不会发生。