我目前有两种检查数字是否为素数的方法,以及另一种计算时间的方法。
IsPrime1:
bool IsPrime1(int i)
{
if (i == 2 || i == 3 || i == 5 || i == 7) return true;
return i % 2 != 0 && i % 3 != 0 && i % 5 != 0 && i % 7 != 0;
}
IsPrime2:
bool IsPrime2(int i)
{
if (i == 2 || i == 3 || i == 5 || i == 7) return true;
if (i % 2 == 0) return false;
if (i % 3 == 0) return false;
if (i % 5 == 0) return false;
return i % 7 != 0;
}
CheckForTicks:
string CheckForTicks(int ticks)
{
var sw1 = Stopwatch.StartNew();
for (var g = 0; g < ticks; g++)
{
var b = IsPrime1(g);
}
sw1.Stop();
var sw2 = Stopwatch.StartNew();
for (var g = 0; g < ticks; g++)
{
var b = IsPrime2(g);
}
sw2.Stop();
return $"{ticks} ticks: IsPrime1: {sw1.ElapsedMilliseconds} ms / IsPrime2: {sw2.ElapsedMilliseconds} ms";
//equal to the following:
//return string.Format("{0} ticks: IsPrime1: {1} ms / IsPrime2: {2} ms", ticks, sw1.ElapsedMilliseconds, sw2.ElapsedMilliseconds);
}
结果:
| CheckForTicks | IsPrime1 (in ms) | IsPrime2 (in ms) |
|---------------|------------------|------------------|
| 100000 | 3 | 4 |
| 500000 | 18 | 21 |
| 1000000 | 37 | 45 |
| 5000000 | 221 | 242 |
| 10000000 | 402 | 499 |
| 50000000 | 2212 | 2320 |
| 100000000 | 4377 | 4676 |
| 500000000 | 22125 | 23786 |
我想知道的是,为什么IsPrime2
甚至比IsPrime1
稍慢
从我的观点来看,IsPrime2
应该比IsPrime1
快得多,因为它只需要在第一个return
和IsPrime1
检查所有可能性之前检查一次。
有什么我不知道的或与.NET
有关吗?
如果有人能向我解释原因,我将非常感激。
提前致谢!
PS:我正在使用Visual Studio 2015 RC
和.NET 4.6
并以Debug
模式运行。
答案 0 :(得分:9)
让我们比较一下IL代码:
IsPrime1
.method private hidebysig instance bool IsPrime1(int32 i) cil managed
{
// Code size 45 (0x2d)
.maxstack 8
IL_0000: ldarg.1
IL_0001: ldc.i4.2
IL_0002: beq.s IL_0010
IL_0004: ldarg.1
IL_0005: ldc.i4.3
IL_0006: beq.s IL_0010
IL_0008: ldarg.1
IL_0009: ldc.i4.5
IL_000a: beq.s IL_0010
IL_000c: ldarg.1
IL_000d: ldc.i4.7
IL_000e: bne.un.s IL_0012
IL_0010: ldc.i4.1
IL_0011: ret
IL_0012: ldarg.1
IL_0013: ldc.i4.2
IL_0014: rem
IL_0015: brfalse.s IL_002b
IL_0017: ldarg.1
IL_0018: ldc.i4.3
IL_0019: rem
IL_001a: brfalse.s IL_002b
IL_001c: ldarg.1
IL_001d: ldc.i4.5
IL_001e: rem
IL_001f: brfalse.s IL_002b
IL_0021: ldarg.1
IL_0022: ldc.i4.7
IL_0023: rem
IL_0024: ldc.i4.0
IL_0025: ceq
IL_0027: ldc.i4.0
IL_0028: ceq
IL_002a: ret
IL_002b: ldc.i4.0
IL_002c: ret
} // end of method Program::IsPrime1
IsPrime2
.method private hidebysig instance bool IsPrime2(int32 i) cil managed
{
// Code size 49 (0x31)
.maxstack 8
IL_0000: ldarg.1
IL_0001: ldc.i4.2
IL_0002: beq.s IL_0010
IL_0004: ldarg.1
IL_0005: ldc.i4.3
IL_0006: beq.s IL_0010
IL_0008: ldarg.1
IL_0009: ldc.i4.5
IL_000a: beq.s IL_0010
IL_000c: ldarg.1
IL_000d: ldc.i4.7
IL_000e: bne.un.s IL_0012
IL_0010: ldc.i4.1
IL_0011: ret
IL_0012: ldarg.1
IL_0013: ldc.i4.2
IL_0014: rem
IL_0015: brtrue.s IL_0019
IL_0017: ldc.i4.0
IL_0018: ret
IL_0019: ldarg.1
IL_001a: ldc.i4.3
IL_001b: rem
IL_001c: brtrue.s IL_0020
IL_001e: ldc.i4.0
IL_001f: ret
IL_0020: ldarg.1
IL_0021: ldc.i4.5
IL_0022: rem
IL_0023: brtrue.s IL_0027
IL_0025: ldc.i4.0
IL_0026: ret
IL_0027: ldarg.1
IL_0028: ldc.i4.7
IL_0029: rem
IL_002a: ldc.i4.0
IL_002b: ceq
IL_002d: ldc.i4.0
IL_002e: ceq
IL_0030: ret
} // end of method Program::IsPrime2
第一部分对于两者都是相同的:
.maxstack 8
IL_0000: ldarg.1
IL_0001: ldc.i4.2
IL_0002: beq.s IL_0010
IL_0004: ldarg.1
IL_0005: ldc.i4.3
IL_0006: beq.s IL_0010
IL_0008: ldarg.1
IL_0009: ldc.i4.5
IL_000a: beq.s IL_0010
IL_000c: ldarg.1
IL_000d: ldc.i4.7
IL_000e: bne.un.s IL_0012
IL_0010: ldc.i4.1
IL_0011: ret
毫不奇怪,这匹配:
if (i == 2 || i == 3 || i == 5 || i == 7) return true;
其余代码是等效的,但编译器为IsPrime1
生成了更短的代码。
IL_0012: ldarg.1 // Push i
IL_0013: ldc.i4.2 // Push 2
IL_0014: rem // Pop these and push i % 2
IL_0015: brfalse.s IL_002b // Go to IL_002b if the result is 0
... // Repeat the same pattern for 3, 5 and 7
IL_002b: ldc.i4.0 // Push 0 (false)
IL_002c: ret // Return
这里是IsPrime2
中的相同部分:
IL_0012: ldarg.1 // Push i
IL_0013: ldc.i4.2 // Push 2
IL_0014: rem // Pop these and push i % 2
IL_0015: brtrue.s IL_0019 // Go to IL_0019 if the result is not 0
IL_0017: ldc.i4.0 // Else load 0 (false)
IL_0018: ret // ... and return
IL_0019: ... // Here's the next condition
...
正如您所看到的,return false
代码在IsPrime2
中重复了几次,但在IsPrime1
的情况下会被考虑在内。更短的代码意味着更少的加载和处理指令,这反过来意味着更少的开销和更少的处理时间。
现在,JIT怎么样?它是否优化了这一点?
IsPrime1
x86
return i % 2 != 0 && i % 3 != 0 && i % 5 != 0 && i % 7 != 0;
00000022 mov eax,esi
00000024 and eax,80000001h
00000029 jns 00000030
0000002b dec eax
0000002c or eax,0FFFFFFFEh
0000002f inc eax
00000030 test eax,eax
00000032 je 00000061
00000034 mov eax,esi
00000036 mov ecx,3
0000003b cdq
0000003c idiv eax,ecx
0000003e test edx,edx
00000040 je 00000061
00000042 mov eax,esi
00000044 lea ecx,[ecx+2]
00000047 cdq
00000048 idiv eax,ecx
0000004a test edx,edx
0000004c je 00000061
0000004e lea ecx,[ecx+2]
00000051 mov eax,esi
00000053 cdq
00000054 idiv eax,ecx
00000056 test edx,edx
00000058 setne al
0000005b movzx eax,al
0000005e pop esi
0000005f pop ebp
00000060 ret
00000061 xor eax,eax
00000063 pop esi
00000064 pop ebp
00000065 ret
IsPrime2
x86
if (i % 2 == 0) return false;
00000021 mov eax,esi
00000023 and eax,80000001h
00000028 jns 0000002F
0000002a dec eax
0000002b or eax,0FFFFFFFEh
0000002e inc eax
0000002f test eax,eax
00000031 jne 00000037
00000033 xor eax,eax
00000035 jmp 0000006D
if (i % 3 == 0) return false;
00000037 mov eax,esi
00000039 mov ecx,3
0000003e cdq
0000003f idiv eax,ecx
00000041 test edx,edx
00000043 jne 00000049
00000045 xor eax,eax
00000047 jmp 0000006D
if (i % 5 == 0) return false;
00000049 mov eax,esi
0000004b mov ecx,5
00000050 cdq
00000051 idiv eax,ecx
00000053 test edx,edx
00000055 jne 0000005B
00000057 xor eax,eax
00000059 jmp 0000006D
return i % 7 != 0;
0000005b mov ecx,7
00000060 mov eax,esi
00000062 cdq
00000063 idiv eax,ecx
00000065 test edx,edx
00000067 setne al
0000006a movzx eax,al
0000006d and eax,0FFh
00000072 pop esi
00000073 pop ebp
00000074 ret
答案是......在IsPrime2
的情况下,本机代码仍然更长。例如,jne 00000037
跳转到第二个测试,jne 00000049
跳转到第三个测试等。在IsPrime1
的情况下,每个分支指向00000061
,基本上是return false;
{1}}。
这里是x64代码供参考:
IsPrime1
x64
return i % 2 != 0 && i % 3 != 0 && i % 5 != 0 && i % 7 != 0;
0000001f mov eax,r8d
00000022 cdq
00000023 and eax,1
00000026 xor eax,edx
00000028 sub eax,edx
0000002a test eax,eax
0000002c je 000000000000008B
0000002e mov eax,55555556h
00000033 imul r8d
00000036 mov eax,edx
00000038 shr eax,1Fh
0000003b add edx,eax
0000003d lea eax,[rdx+rdx*2]
00000040 mov ecx,r8d
00000043 sub ecx,eax
00000045 test ecx,ecx
00000047 je 000000000000008B
00000049 mov eax,66666667h
0000004e imul r8d
00000051 sar edx,1
00000053 mov eax,edx
00000055 shr eax,1Fh
00000058 add edx,eax
0000005a lea eax,[rdx+rdx*4]
0000005d mov ecx,r8d
00000060 sub ecx,eax
00000062 test ecx,ecx
00000064 je 000000000000008B
00000066 mov eax,92492493h
0000006b imul r8d
0000006e add edx,r8d
00000071 sar edx,2
00000074 mov eax,edx
00000076 shr eax,1Fh
00000079 add edx,eax
0000007b imul edx,edx,7
0000007e sub r8d,edx
00000081 xor eax,eax
00000083 test r8d,r8d
00000086 setne al
00000089 jmp 0000000000000092
0000008b xor eax,eax
0000008d jmp 0000000000000092
0000008f nop
if (i == 2 || i == 3 || i == 5 || i == 7) return true;
00000090 mov al,1
00000092 rep ret
IsPrime2
x64
if (i % 2 == 0) return false;
00000027 mov eax,r8d
0000002a cdq
0000002b and eax,1
0000002e xor eax,edx
00000030 sub eax,edx
00000032 test eax,eax
00000034 jne 000000000000003A
00000036 xor eax,eax
00000038 jmp 00000000000000A2
if (i % 3 == 0) return false;
0000003a mov eax,55555556h
0000003f imul r8d
00000042 mov eax,edx
00000044 shr eax,1Fh
00000047 add edx,eax
00000049 lea eax,[rdx+rdx*2]
0000004c mov ecx,r8d
0000004f sub ecx,eax
00000051 test ecx,ecx
00000053 jne 0000000000000059
00000055 xor al,al
00000057 jmp 00000000000000A2
if (i % 5 == 0) return false;
00000059 mov eax,66666667h
0000005e imul r8d
00000061 sar edx,1
00000063 mov eax,edx
00000065 shr eax,1Fh
00000068 add edx,eax
0000006a lea eax,[rdx+rdx*4]
0000006d mov ecx,r8d
00000070 sub ecx,eax
00000072 test ecx,ecx
00000074 jne 000000000000007A
00000076 xor al,al
00000078 jmp 00000000000000A2
return i % 7 != 0;
0000007a mov eax,92492493h
0000007f imul r8d
00000082 add edx,r8d
00000085 sar edx,2
00000088 mov eax,edx
0000008a shr eax,1Fh
0000008d add edx,eax
0000008f imul edx,edx,7
00000092 sub r8d,edx
00000095 xor eax,eax
00000097 test r8d,r8d
0000009a setne al
0000009d jmp 00000000000000A2
0000009f nop
if (i == 2 || i == 3 || i == 5 || i == 7) return true;
000000a0 mov al,1
000000a2 rep ret
这里的结论相同。 jne 0000000000000059
跳转到第二个测试,jne 000000000000007A
跳转到第三个测试等,而在IsPrime1
中,所有分支都指向000000000000008B
,这是return false;
。请注意,两个版本之间的指令计数差异在x64上较低。
哦,你应该另外注意branch prediction如何工作,以及CPU如何估计即将到来的分支可能或不太可能
答案 1 :(得分:1)
它有点深,但它们不能编译到同一个MSIL。在功能上它们是等价的,但是核心Prime1在编译时只有一个分支,所以它是A或B.感谢短路,只要它的错误评估就会停止。
Prime2仍然只测试直到遇到错误,但它会编译成4个分支而不是1个。
虽然性能存在显着差异,但在大多数情况下,我认为您希望设计该方法以获得更易读的方法(无论您认为是哪种方式)。
答案 2 :(得分:0)
他们不生成相同的代码:==
不是!=
(虽然性能应该相同),如果不使用{{1} }。每个if操作在执行之后执行清理(在操作之后将0加载到寄存器)和&amp;&amp;没有在评估之间进行任何清理。
如果版本:
&&
&安培;&安培;版本:
ldarg.0
ldc.i4.2
rem
brtrue.s IL_0020 // jumps to ret
ldc.i4.0
ret
// continues for 3, 5 and 7