C#奇怪的模速度行为

时间:2019-06-22 01:11:31

标签: c#

为什么第一段代码比第二段要慢得多?

public long Gen(int mod)
{
    long a, b = 0x7fffffffffffffff - mod;

    do
    {
        a = (long)(Gen() >> 1);
    } while (a > b);

    return a % mod;
}

public long Gen(int mod)
{
    long a, b = 0x7fffffffffffffff - mod;

    do
    {
        a = (long)(Gen() >> 1);
    } while (a > b);

    return a % 12345;
}

gen函数是64位无符号PRNG(请参见下文)。

问题在于,第一段代码的运行速度如此之慢,以至于使用变量来计算模数基本上会使计算随机数所需的时间增加3倍!更令人迷惑的是,当您删除循环并使用变量计算模数时,速度类似于第二段代码。

有些奇怪的事情在这里发生,因为您不能告诉我使用变量的模数的速度是这样的几倍:

public ulong Gen()
{
    counter = (counter + 1) & 3;

    if (counter == 0)
    {
        state[0]++;

        ulong x0 = state[0];
        ulong x1 = state[1];
        ulong x2 = state[2];
        ulong x3 = state[3];

        for (int i = 0; i < 2; i++)
        {
            x0 += x1; x1 ^= ((x0 << 32) | (x0 >> (64 - 32)));
            x1 += x0; x0 ^= ((x1 << 32) | (x1 >> (64 - 32)));
            x2 += x3; x3 ^= ((x2 << 32) | (x2 >> (64 - 32)));
            x3 += x2; x2 ^= ((x3 << 32) | (x3 >> (64 - 32)));

            x0 += x2; x2 ^= ((x0 << 27) | (x0 >> (64 - 27)));
            x2 += x0; x0 ^= ((x2 << 27) | (x2 >> (64 - 27)));
            x1 += x3; x3 ^= ((x1 << 27) | (x1 >> (64 - 27)));
            x3 += x1; x1 ^= ((x3 << 27) | (x3 >> (64 - 27)));

            x0 += x3; x3 ^= ((x0 << 11) | (x0 >> (64 - 11)));
            x3 += x0; x0 ^= ((x3 << 11) | (x3 >> (64 - 11)));
            x1 += x2; x2 ^= ((x1 << 11) | (x1 >> (64 - 11)));
            x2 += x1; x1 ^= ((x2 << 11) | (x2 >> (64 - 11)));
        }

        block[0] = x0;
        block[1] = x1;
        block[2] = x2;
        block[3] = x3;
    }

    return block[counter];
}

要求的最小可复制版本:

using System;
using System.Diagnostics;

class Program
{
    static void Main(string[] args)
    {
        Stopwatch sw = new Stopwatch();
        Arx rng = new Arx();
        long a = 0;

        // constant = fast

        sw.Start();
        for (int i = 0; i < 10000000; i++)
        {
            a += rng.GenConstant(123);
        }
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        Console.WriteLine("{0:x16}", a);
        sw.Reset();

        // no loop = fast

        sw.Start();
        for (int i = 0; i < 10000000; i++)
        {
            a += rng.GenNoLoop(123);
        }
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        Console.WriteLine("{0:x16}", a);
        sw.Reset();

        // modulus variable = slow

        sw.Start();
        for (int i = 0; i < 10000000; i++)
        {
            a += rng.GenVariable(123);
        }
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        Console.WriteLine("{0:x16}", a);
        sw.Reset();
    }
}

class Arx
{
    static public ulong[] state = new ulong[4];
    static public ulong[] outBlock = new ulong[4];

    static int counter = -1;

    public Arx(ulong seed = 0)
    {
        if (seed == 0)
            state[1] = (ulong)DateTime.UtcNow.Ticks;

        else
            state[1] = seed;
    }

    public ulong Gen()
    {
        counter = (counter + 1) & 3;

        if (counter == 0)
        {
            state[0]++;

            ulong x0 = state[0];
            ulong x1 = state[1];
            ulong x2 = state[2];
            ulong x3 = state[3];

            for (int i = 0; i < 2; i++)
            {
                x0 += x1; x1 ^= ((x0 << 32) | (x0 >> (64 - 32)));
                x1 += x0; x0 ^= ((x1 << 32) | (x1 >> (64 - 32)));
                x2 += x3; x3 ^= ((x2 << 32) | (x2 >> (64 - 32)));
                x3 += x2; x2 ^= ((x3 << 32) | (x3 >> (64 - 32)));

                x0 += x2; x2 ^= ((x0 << 27) | (x0 >> (64 - 27)));
                x2 += x0; x0 ^= ((x2 << 27) | (x2 >> (64 - 27)));
                x1 += x3; x3 ^= ((x1 << 27) | (x1 >> (64 - 27)));
                x3 += x1; x1 ^= ((x3 << 27) | (x3 >> (64 - 27)));

                x0 += x3; x3 ^= ((x0 << 11) | (x0 >> (64 - 11)));
                x3 += x0; x0 ^= ((x3 << 11) | (x3 >> (64 - 11)));
                x1 += x2; x2 ^= ((x1 << 11) | (x1 >> (64 - 11)));
                x2 += x1; x1 ^= ((x2 << 11) | (x2 >> (64 - 11)));
            }

            outBlock[0] = x0;
            outBlock[1] = x1;
            outBlock[2] = x2;
            outBlock[3] = x3;
        }

        return outBlock[counter];
    }

    public long GenConstant(int mod)
    {
        long a, b = 0x7fffffffffffffff - mod;

        do
        {
            a = (long)(Gen() >> 1);
        } while (a > b);

        return a % 12345;
    }

    public long GenVariable(int mod)
    {
        long a, b = 0x7fffffffffffffff - mod;

        do
        {
            a = (long)(Gen() >> 1);
        } while (a > b);

        return a % mod;
    }

    public long GenNoLoop(int mod)
    {
        long a = (long)(Gen() >> 1);

        return a % mod;
    }
}

2 个答案:

答案 0 :(得分:1)

这是一个优化程序问题。

首先毫无疑问,使用变量要比使用常量慢,因为加载变量需要更多时间

但是,当您删除循环部分时,方法变得很简单,优化器将它们内联。并且请注意,当方法为内联时,可以将a % mod中的表达式rng.GenNoLoop(123)识别为常量。因此,它们现在是相同的。

要恢复未优化状态,您需要将实变量传递给GenNoLoop

static int mod = 123;

static void Main(string[] args)
{
    rng.GenNoLoop(mod);
}

另一种选择是强制该方法无内联

[MethodImpl(MethodImplOptions.NoInlining)]
public long GenNoLoop(int mod)

答案 1 :(得分:0)

我使用此代码来测试两种方法的速度:

void Main()
{
    Stopwatch sw = new Stopwatch();
    var ts1 = TimeSpan.Zero;
    var ts2 = TimeSpan.Zero;

    Arx rng = new Arx();

    for (var x = 0; x < 1000; x++)
    {
        long a = 0;
        sw.Start();
        for (int i = 0; i < 100000; i++)
        {
            a += rng.GenVariable(123);
        }
        sw.Stop();
        ts1 += sw.Elapsed;
        sw.Reset();

        a = 0;
        sw.Start();
        for (int i = 0; i < 100000; i++)
        {
            a += rng.GenConstant(123);
        }
        sw.Stop();
        ts2 += sw.Elapsed;
        sw.Reset();     
    }

    ts1.TotalMilliseconds.Dump();
    ts2.TotalMilliseconds.Dump();
}

我分别获得了2890.53912805.8824毫秒。可变版本仅慢3%。没什么大不同。