因此,我想为跨平台创建一个小型游戏,但后来我遇到了不支持JIT的设备,例如IPhone,Windows Mobile和Xbox One(游戏方面,而不是应用程序方面)。 / p>
由于该游戏必须从其中包含脚本的文本文件中生成一些“基本”代码,例如公式,赋值,调用函数,在每个对象的字典中修改/存储值(类似于混合交互式小说游戏) ,实际上是不可能通过AOT编译完成的。
我希望Visual Studio中的编译代码能被禁食,而Linq.Expression的速度最多可慢10%。
存储功能并为几乎所有功能调用它们的方法,我希望比编译的代码慢很多,但是。 让我感到惊讶,速度更快?
这是使用Visual Studio 2017编译的
.Net Framework 4.7.2
平台目标= x86(尚未在ARM上进行测试)
在独立的Visual Studio上对Visual Studio进行了测试,性能没有明显变化。
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq.Expressions;
namespace Test
class Program
static void Main(string[] args)
new PerformanceTest();
Console.WriteLine("Done, press enter to exit");
class TestObject
public Dictionary<string, float> data = new Dictionary<string, float>();
public TestObject(Random rnd)
data.Add("A", (float)rnd.NextDouble());
data.Add("B", (float)rnd.NextDouble());
data.Add("C", (float)rnd.NextDouble());
data.Add("D", (float)rnd.NextDouble() + 1.0f);
data.Add("E", (float)rnd.NextDouble());
data.Add("F", (float)rnd.NextDouble() + 1.0f);
class PerformanceTest
Stopwatch timer = new Stopwatch();
public PerformanceTest()
var rnd = new Random(1);
int testSize = 5000000;
int testTimes = 5;
Console.WriteLine($"Creating {testSize} objects to test performance with");
var data = new TestObject[testSize];
for (int i = 0; i < data.Length; i++)
data[i] = new TestObject(rnd);
Console.WriteLine($"Created objects in {timer.ElapsedMilliseconds} milliseconds");
int handlers = 1000;
Console.WriteLine($"Creating {handlers} handlers per type");
var tests = new PerfTest[3][];
tests[0] = new PerfTest[handlers];
tests[1] = new PerfTest[handlers];
tests[2] = new PerfTest[handlers];
for (int i = 0; i < tests[0].Length; i++)
tests[0][i] = new TestNormal();
for (int i = 0; i < tests[1].Length; i++)
tests[1][i] = new TestExpression();
for (int i = 0; i < tests[2].Length; i++)
tests[2][i] = new TestOther();
Console.WriteLine($"Handlers created");
Console.WriteLine($"Warming up all handlers");
for (int t = 0; t < tests.Length; t++)
for (int i = 0; i < tests[t].Length; i++)
Console.WriteLine($"Testing data {testTimes} times with handlers of each type");
for (int i = 0; i < testTimes; i++)
for (int t = 0; t < tests.Length; t++)
Loop(tests[t], data);
void Loop(PerfTest[] test, TestObject[] data)
var rnd = new Random(1);
var start = timer.ElapsedMilliseconds;
double sum = 0;
for (int i = 0; i < data.Length; i++)
sum += test[rnd.Next(test.Length)].Perform(data[i]);
var stop = timer.ElapsedMilliseconds;
var elapsed = stop - start;
Console.WriteLine($"{test[0].Name}".PadRight(25) + $"{elapsed} milliseconds".PadRight(20) + $"sum = { sum}");
abstract class PerfTest
public string Name;
public abstract float Perform(TestObject obj);
class TestNormal : PerfTest
public TestNormal()
Name = "\"Normal\"";
public override float Perform(TestObject obj) => obj.data["A"] * obj.data["B"] + obj.data["C"] / obj.data["D"] + obj.data["E"] / (obj.data["E"] + obj.data["F"]);
class TestExpression : PerfTest
Func<TestObject, float> compiledExpression;
public TestExpression()
Name = "Compiled Expression";
var par = Expression.Parameter(typeof(TestObject));
var body = Expression.Add(Expression.Multiply(indexer(par, "A"), indexer(par, "B")), Expression.Add(Expression.Divide(indexer(par, "C"), indexer(par, "D")), Expression.Divide(indexer(par, "E"), Expression.Add(indexer(par, "E"), indexer(par, "F")))));
var lambda = Expression.Lambda<Func<TestObject, float>>(body, par);
compiledExpression = lambda.Compile();
static Expression indexer(Expression parameter, string index)
var property = Expression.Field(parameter, typeof(TestObject).GetField("data"));
return Expression.MakeIndex(property, typeof(Dictionary<string, float>).GetProperty("Item"), new[] { Expression.Constant(index) });
public override float Perform(TestObject obj) => compiledExpression(obj);
class TestOther : PerfTest
Func<TestObject, float>[] parameters;
Func<float, float, float, float, float, float, float> func;
public TestOther()
Name = "other";
Func<float, float, float, float, float, float, float> func = (a, b, c, d, e, f) => a * b + c / d + e / (e + f);
this.func = func; // this delegate will come from a collection of functions, depending on type
parameters = new Func<TestObject, float>[]
(o) => o.data["A"],
(o) => o.data["B"],
(o) => o.data["C"],
(o) => o.data["D"],
(o) => o.data["E"],
(o) => o.data["F"],
float call(TestObject obj, Func<float, float, float, float, float, float, float> myfunc, Func<TestObject, float>[] parameters)
return myfunc(parameters[0](obj), parameters[1](obj), parameters[2](obj), parameters[3](obj), parameters[4](obj), parameters[5](obj));
public override float Perform(TestObject obj) => call(obj, func, parameters);
Creating 5000000 objects to test performance with
Created objects in 7489 milliseconds
Creating 1000 handlers per type
Handlers created
Warming up all handlers
Testing data 5 times with handlers of each type
"Normal" 811 milliseconds sum = 4174863.85436047
Compiled Expression 1371 milliseconds sum = 4174863.85436047
other 746 milliseconds sum = 4174863.85436047
"Normal" 812 milliseconds sum = 4174863.85436047
Compiled Expression 1379 milliseconds sum = 4174863.85436047
other 747 milliseconds sum = 4174863.85436047
"Normal" 812 milliseconds sum = 4174863.85436047
Compiled Expression 1373 milliseconds sum = 4174863.85436047
other 747 milliseconds sum = 4174863.85436047
"Normal" 812 milliseconds sum = 4174863.85436047
Compiled Expression 1373 milliseconds sum = 4174863.85436047
other 747 milliseconds sum = 4174863.85436047
"Normal" 812 milliseconds sum = 4174863.85436047
Compiled Expression 1375 milliseconds sum = 4174863.85436047
other 746 milliseconds sum = 4174863.85436047
Done, press enter to exit
为什么类TestOther的Perform函数比两者都快 TestNormal和TestExpression?
Expression版本在Dictionary FindEntry中所需的CPU数量是其他版本的两倍以上。
Stack Weight (in view) (ms)
GameTest.exe!Test.PerformanceTest::Loop 15,243.896600
|- Anonymously Hosted DynamicMethods Assembly!dynamicClass::lambda_method 6,038.952700
|- GameTest.exe!Test.TestNormal::Perform 3,724.253300
|- GameTest.exe!Test.TestOther::call 3,493.239800
然后,我确实检查了生成的汇编代码。它看起来几乎完全相同,无法解释表达式版本松散的巨大余地。 如果将不同的内容传递给Dictionary [x]调用,我也确实闯入了Windbg,但看上去确实很正常。
for (int i = 0; i < data.Length; i++)
// sum += test[rnd.Next(test.Length)].Perform(data[i]);
sum += test[0].Perform(data[i]);
Compiled Expression 740 milliseconds sum = 4174863.85440933
"Normal" 743 milliseconds sum = 4174863.85430179
other 714 milliseconds sum = 4174863.85430179
Compiled Expression 1359 milliseconds sum = 4174863.85440933
"Normal" 775 milliseconds sum = 4174863.85430179
other 771 milliseconds sum = 4174863.85430179
我确实在3.50GHz的CoreTM i7-4770K CPU上运行了代码。
众所周知,字典对于缓存预测器非常不利,因为它们往往会在找不到模式的内存中随意跳转。许多字典调用似乎已经使预测变量产生了很大的混乱,并且所用测试实例的额外随机性和编译表达式的更复杂的派发对于CPU来说无法预测内存访问模式并将其部分预取到L1 / 2个缓存。实际上,您并不是在测试呼叫性能,而是测试CPU缓存策略的性能。
Method | N | Mean |
--------------- |----- |---------:|
TestNormal | 1000 | 3.175 us |
TestExpression | 1000 | 3.480 us |
TestOther | 1000 | 4.325 us |
public override float Perform(TestObject obj)
return obj.data["A"] * obj.data["B"]
+ obj.data["C"] / obj.data["D"]
+ obj.data["E"] / (obj.data["E"] + obj.data["F"]);
public override float Perform(TestObject obj)
var e = obj.data["E"];
return obj.data["A"] * obj.data["B"]
+ obj.data["C"] / obj.data["D"]
+ e / (e + obj.data["F"]);