使用TPL拆分for-cycle

时间:2014-10-05 19:46:35

标签: c# task-parallel-library

为了并行化和加速计算,我使用TPL将一个长的for-cycle分成两个短的for-cycles,这些部分在我的课程中称为PointGenerator和PointGenerator2:

class CalcPiTPL
    {
        int n;
        int totalCounter;
        int counter1;
        int counter2;
        double aPi;
        public StringBuilder Msg; // diagonstic message
        Stopwatch stopWatch = new Stopwatch();

        public void Init(int aN)
        {
            stopWatch.Start();
            n = aN; // save total calculate-iterations amount
            aPi = -1; // flag, if no any calculate-iteration has been completed
            Msg = new StringBuilder("No any calculate-iteration has been completed");
        }
        public void Run()
        {
            if (n < 1)
            {
                Msg = new StringBuilder("Invalid N-value");
                return;
            }

            Task[] tasks = new Task[2];
            tasks[0] = Task.Factory.StartNew((obj) => { PointGenerator((int)obj); }, n);
            tasks[1] = Task.Factory.StartNew((obj) => { PointGenerator2((int)obj); }, n);

            Task.WaitAll(tasks[0], tasks[1]);
            totalCounter = counter1 + counter2;
            aPi = 4.0 * ((double)totalCounter / (double)n); // to calculate approximate Pi - value
            Console.WriteLine(aPi);
            stopWatch.Stop();
            TimeSpan ts = stopWatch.Elapsed;
            string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
            ts.Hours, ts.Minutes, ts.Seconds,
            ts.Milliseconds / 10);
            Console.WriteLine("RunTime " + elapsedTime);
        }
        public double Done()
        {
            if (aPi > 0)
            {
                Msg = new StringBuilder("Calculates has been completed successful");
                return aPi; // return gotten value
            }
            else
            {
                return 0; // no result
            }
        }
        public void PointGenerator(int n)//FIRST PART OF ONE BIG FOR-CYCLE
        {
            double s = 0.125;
            double sP = s / (n / 2);
            double x = Math.Sqrt(sP);
            for (double cX = 0; cX <= 0.25; cX += x)
            {
                for (double cY = 0; cY <= 0.5; cY += x)
                {
                    if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
                    {
                        counter1++; // coordinate in a circle! mark it by incrementing N_0
                    }
                }
            }
        }

        public void PointGenerator2(int n)//SECOND PART OF ONE BIG FOR-CYCLE
        {
            double s = 0.125;
            double sP = s / (n / 2);
            double x = Math.Sqrt(sP);
            for (double cX = 0.25; cX <= 0.5; cX += x)
            {
                for (double cY = 0; cY <= 0.5; cY += x)
                {
                    if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
                    {
                        counter2++; // coordinate in a circle! mark it by incrementing N_0
                    }
                }
            }
        }
    }

这是没有使用任务(TPL)的同一个类,它有一个很长的for-cycle:

class TCalcPi//unparallel calculating method
    {
        int N;
        int n_0;
        double aPi;
        public StringBuilder Msg; // diagnostic message


        Stopwatch stopWatch = new Stopwatch();

        public void Init(int aN)
        {
            stopWatch.Start();
            N = aN; // save total calculate-iterations amount
            aPi = -1; // flag, if no any calculate-iteration has been completed
            Msg = new StringBuilder("No any calculate-iteration has been completed");
        }

        public void Run()
        {
            if (N < 1)
            {
                Msg = new StringBuilder("Invalid N - value");
                return;
            }

            double s = 0.25;
            double sP = s / N;
            double x = Math.Sqrt(sP);
            for (double cX = 0; cX <= 0.5; cX += x)//ONE LONG FOR-CYCLE
            {
                for(double cY = 0; cY <= 0.5; cY += x)
                {
                    if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
                    {
                        n_0++; // coordinate in a circle! mark it by incrementing N_0
                    }
                }
            }
            aPi = 4.0 * ((double)n_0 / (double)N); // to calculate approximate Pi - value
            stopWatch.Stop();
            TimeSpan ts = stopWatch.Elapsed;
            string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
            ts.Hours, ts.Minutes, ts.Seconds,
            ts.Milliseconds / 10);
            Console.WriteLine("RunTime " + elapsedTime);
        }
        public double Done()
        {
            if (aPi > 0)
            {
                Msg = new StringBuilder("Calculates has been completed successful");
                return aPi; // return gotten value
            }
            else
            {
                return 0; // no result
            }
        }
    }

但是,非并行化类比并行化(使用TPL)类工作得更快。如何解决?

1 个答案:

答案 0 :(得分:1)

counter1counter2很可能位于相同的缓存行中,因为它们在内存中相邻。这会导致False Sharing。也许你经常增加这些计数器。对于计数器之间的每次交替,这会在两个核心的L1之间ping高速缓存行。

将它们分开。作为概念的证明,像这样:

int counter1;
long padding0, p1, p2, p3, p4, p5, p6, p7; //64 bytes padding
int counter2;

我们希望JIT不会对字段进行重新排序。也许您需要使用StructLayout

或者,使计数器局部变量。堆栈变量只有非常巧合的错误共享。