为什么Regex CompileToAssembly比编译的正则表达式和解释的正则表达式提供更慢的性能?

时间:2012-04-09 20:16:05

标签: c# .net regex

我使用以下代码来测试CompileToAssembly性能与编译的正则表达式,但结果不合适。请让我知道我错过了什么。感谢!!!

static readonly Regex regex = new Regex(@"(stats|pause\s?(all|\d+(\,\d+)*)|start\s?(all|\d+(\,\d+)*)|add\s?time\s?(all|\d+(\,\d+)*)(\s\d+)|c(?:hange)?\s?p(?:asskey)?|close)(.*)", RegexOptions.Compiled);
static readonly Regex reg = new Regex(@"(stats|pause\s?(all|\d+(\,\d+)*)|start\s?(all|\d+(\,\d+)*)|add\s?time\s?(all|\d+(\,\d+)*)(\s\d+)|c(?:hange)?\s?p(?:asskey)?|close)(.*)");
static readonly Regex level4 = new DuplicatedString();

    static void Main()
    {
        const string str = "add time 243,3453,43543,543,534534,54534543,345345,4354354235,345435,34543534 6873brekgnfkjerkgiengklewrij";
        const int itr = 1000000;
        CompileToAssembly();
        Match match;
        Stopwatch sw = new Stopwatch();
        sw.Start();
        for (int i = 0; i < itr; i++)
        {
             match = regex.Match(str);
        }
        sw.Stop();
        Console.WriteLine("RegexOptions.Compiled: {0}ms", sw.ElapsedMilliseconds);

        sw.Reset();
        sw.Start();
        for (int i = 0; i < itr; i++)
        {
            match = level4.Match(str);
        }
        sw.Stop();

        Console.WriteLine("CompiledToAssembly: {0}ms", sw.ElapsedMilliseconds);

        sw.Reset();
        sw.Start();
        for (int i = 0; i < itr; i++)
        {
            match = reg.Match(str);
        }
        sw.Stop();
        Console.WriteLine("Interpreted: {0}ms", sw.ElapsedMilliseconds);
        Console.ReadLine();
    }

    public static void CompileToAssembly()
    {
        RegexCompilationInfo expr;
        List<RegexCompilationInfo> compilationList = new List<RegexCompilationInfo>();

        // Define regular expression to detect duplicate words
        expr = new RegexCompilationInfo(@"(stats|pause\s?(all|\d+(\,\d+)*)|start\s?(all|\d+(\,\d+)*)|add\s?time\s?(all|\d+(\,\d+)*)(\s\d+)|c(?:hange)?\s?p(?:asskey)?|close)(.*)",
                   RegexOptions.Compiled,
                   "DuplicatedString",
                   "Utilities.RegularExpressions",
                   true);
        // Add info object to list of objects
        compilationList.Add(expr);

        // Apply AssemblyTitle attribute to the new assembly
        //
        // Define the parameter(s) of the AssemblyTitle attribute's constructor 
        Type[] parameters = { typeof(string) };
        // Define the assembly's title
        object[] paramValues = { "General-purpose library of compiled regular expressions" };
        // Get the ConstructorInfo object representing the attribute's constructor
        ConstructorInfo ctor = typeof(System.Reflection.AssemblyTitleAttribute).GetConstructor(parameters);
        // Create the CustomAttributeBuilder object array
        CustomAttributeBuilder[] attBuilder = { new CustomAttributeBuilder(ctor, paramValues) };

        // Generate assembly with compiled regular expressions
        RegexCompilationInfo[] compilationArray = new RegexCompilationInfo[compilationList.Count];
        AssemblyName assemName = new AssemblyName("RegexLib, Version=1.0.0.1001, Culture=neutral, PublicKeyToken=null");
        compilationList.CopyTo(compilationArray);
        Regex.CompileToAssembly(compilationArray, assemName, attBuilder);
    }

以下是结果:

RegexOptions.Compiled: 3908ms
CompiledToAssembly: 59349ms
Interpreted: 5653ms

2 个答案:

答案 0 :(得分:6)

您的代码有问题:静态字段初始值设定项将在静态方法运行之前运行。这意味着在level4运行之前已经分配了Main()。这意味着level4引用的对象CompileToAssembly()中创建的类的实例。

请注意,Regex.CompileToAssembly的示例代码显示了正则表达式的编译及其在两个不同程序中的消耗。因此,您定时为“CompiledToAssembly”的实际正则表达式可能是您在早期测试中编译的不同正则表达式。

要考虑的另一个因素是:将程序集加载到内存中并将其与机器代码相匹配的开销可能非常大,以至于需要超过1,000,000次迭代才能看到好处。

答案 1 :(得分:4)

您正在调试器(Visual Studio)下运行。它将阻止在加载程序集时发生JIT优化。尝试在没有调试器的情况下运行(ctrl-f5)。