为了并行化和加速计算,我使用TPL将一个长的for-cycle分成两个短的for-cycles,这些部分在我的课程中称为PointGenerator和PointGenerator2:
class CalcPiTPL
{
int n;
int totalCounter;
int counter1;
int counter2;
double aPi;
public StringBuilder Msg; // diagonstic message
Stopwatch stopWatch = new Stopwatch();
public void Init(int aN)
{
stopWatch.Start();
n = aN; // save total calculate-iterations amount
aPi = -1; // flag, if no any calculate-iteration has been completed
Msg = new StringBuilder("No any calculate-iteration has been completed");
}
public void Run()
{
if (n < 1)
{
Msg = new StringBuilder("Invalid N-value");
return;
}
Task[] tasks = new Task[2];
tasks[0] = Task.Factory.StartNew((obj) => { PointGenerator((int)obj); }, n);
tasks[1] = Task.Factory.StartNew((obj) => { PointGenerator2((int)obj); }, n);
Task.WaitAll(tasks[0], tasks[1]);
totalCounter = counter1 + counter2;
aPi = 4.0 * ((double)totalCounter / (double)n); // to calculate approximate Pi - value
Console.WriteLine(aPi);
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts.Hours, ts.Minutes, ts.Seconds,
ts.Milliseconds / 10);
Console.WriteLine("RunTime " + elapsedTime);
}
public double Done()
{
if (aPi > 0)
{
Msg = new StringBuilder("Calculates has been completed successful");
return aPi; // return gotten value
}
else
{
return 0; // no result
}
}
public void PointGenerator(int n)//FIRST PART OF ONE BIG FOR-CYCLE
{
double s = 0.125;
double sP = s / (n / 2);
double x = Math.Sqrt(sP);
for (double cX = 0; cX <= 0.25; cX += x)
{
for (double cY = 0; cY <= 0.5; cY += x)
{
if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
{
counter1++; // coordinate in a circle! mark it by incrementing N_0
}
}
}
}
public void PointGenerator2(int n)//SECOND PART OF ONE BIG FOR-CYCLE
{
double s = 0.125;
double sP = s / (n / 2);
double x = Math.Sqrt(sP);
for (double cX = 0.25; cX <= 0.5; cX += x)
{
for (double cY = 0; cY <= 0.5; cY += x)
{
if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
{
counter2++; // coordinate in a circle! mark it by incrementing N_0
}
}
}
}
}
这是没有使用任务(TPL)的同一个类,它有一个很长的for-cycle:
class TCalcPi//unparallel calculating method
{
int N;
int n_0;
double aPi;
public StringBuilder Msg; // diagnostic message
Stopwatch stopWatch = new Stopwatch();
public void Init(int aN)
{
stopWatch.Start();
N = aN; // save total calculate-iterations amount
aPi = -1; // flag, if no any calculate-iteration has been completed
Msg = new StringBuilder("No any calculate-iteration has been completed");
}
public void Run()
{
if (N < 1)
{
Msg = new StringBuilder("Invalid N - value");
return;
}
double s = 0.25;
double sP = s / N;
double x = Math.Sqrt(sP);
for (double cX = 0; cX <= 0.5; cX += x)//ONE LONG FOR-CYCLE
{
for(double cY = 0; cY <= 0.5; cY += x)
{
if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
{
n_0++; // coordinate in a circle! mark it by incrementing N_0
}
}
}
aPi = 4.0 * ((double)n_0 / (double)N); // to calculate approximate Pi - value
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts.Hours, ts.Minutes, ts.Seconds,
ts.Milliseconds / 10);
Console.WriteLine("RunTime " + elapsedTime);
}
public double Done()
{
if (aPi > 0)
{
Msg = new StringBuilder("Calculates has been completed successful");
return aPi; // return gotten value
}
else
{
return 0; // no result
}
}
}
但是,非并行化类比并行化(使用TPL)类工作得更快。如何解决?
答案 0 :(得分:1)
counter1
和counter2
很可能位于相同的缓存行中,因为它们在内存中相邻。这会导致False Sharing。也许你经常增加这些计数器。对于计数器之间的每次交替,这会在两个核心的L1之间ping高速缓存行。
将它们分开。作为概念的证明,像这样:
int counter1;
long padding0, p1, p2, p3, p4, p5, p6, p7; //64 bytes padding
int counter2;
我们希望JIT不会对字段进行重新排序。也许您需要使用StructLayout
。
或者,使计数器局部变量。堆栈变量只有非常巧合的错误共享。