我使用蒙特卡罗方法编写了一段代码来计算PI,该方法使用1.7 GHz Intel Core i7(似乎是4650U)运行在2013年的Mac书籍上。 当循环计数为10 ^ 8时需要2~3秒,当循环计数为10 ^ 9时需要大约25秒。
import Foundation
func randomNumber(lowerBound:Double, upperBound:Double) -> Double {
return lowerBound + Double(rand()) / Double(RAND_MAX) * (upperBound - lowerBound)
}
let pointNumber = 1000000000
var pointInsideCount = 0
for i in 0...pointNumber {
let x = randomNumber(-1.0, upperBound:1.0)
let y = randomNumber(-1.0, upperBound:1.0)
if x*x+y*y <= 1 {
pointInsideCount += 1
}
}
let result = Double(pointInsideCount) / Double(pointNumber) * 4
let piString = String(format: "%.50f", result)
print("Pi is \(piString)")
我运行“ di -n randomNumber ”来获取randomNumber函数的汇编代码
swiftTest`swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double:
0x10023c160 <+0>: pushq %rbp
0x10023c161 <+1>: movq %rsp, %rbp
0x10023c164 <+4>: subq $0x20, %rsp
0x10023c168 <+8>: movsd %xmm0, -0x8(%rbp)
0x10023c16d <+13>: movsd %xmm1, -0x10(%rbp)
0x10023c172 <+18>: movsd %xmm0, -0x18(%rbp)
0x10023c177 <+23>: movsd %xmm1, -0x20(%rbp)
0x10023c17c <+28>: callq 0x10027585e ; symbol stub for: rand
0x10023c181 <+33>: movsd 0x3bc1f(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 352
0x10023c189 <+41>: cvtsi2sdl %eax, %xmm1
0x10023c18d <+45>: divsd %xmm0, %xmm1
0x10023c191 <+49>: movsd -0x20(%rbp), %xmm0
0x10023c196 <+54>: movsd -0x18(%rbp), %xmm2
0x10023c19b <+59>: subsd %xmm2, %xmm0
0x10023c19f <+63>: mulsd %xmm0, %xmm1
0x10023c1a3 <+67>: addsd %xmm1, %xmm2
0x10023c1a7 <+71>: movaps %xmm2, %xmm0
0x10023c1aa <+74>: addq $0x20, %rsp
0x10023c1ae <+78>: popq %rbp
0x10023c1af <+79>: retq
运行“ di -f ”以获取孔文件的汇编代码
swiftTest`main:
0x10023bcd0 <+0>: pushq %rbp
0x10023bcd1 <+1>: movq %rsp, %rbp
0x10023bcd4 <+4>: subq $0x120, %rsp
0x10023bcdb <+11>: leaq 0x9340e(%rip), %rax ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token6
0x10023bce2 <+18>: leaq 0x933ff(%rip), %rcx ; static Swift.Process._argc : Swift.Int32
0x10023bce9 <+25>: movl %edi, (%rcx)
0x10023bceb <+27>: cmpq $-0x1, (%rax)
0x10023bcf2 <+34>: movq %rsi, -0x60(%rbp)
0x10023bcf6 <+38>: je 0x10023bd0e ; <+62> at main.swift
0x10023bcf8 <+40>: leaq 0x933f1(%rip), %rdi ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token6
0x10023bcff <+47>: leaq -0x99d56(%rip), %rax ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_func6
0x10023bd06 <+54>: movq %rax, %rsi
0x10023bd09 <+57>: callq 0x100266870 ; swift_once
0x10023bd0e <+62>: leaq 0x933e3(%rip), %rax ; static Swift.Process._unsafeArgv : Swift.UnsafeMutablePointer<Swift.UnsafeMutablePointer<Swift.Int8>>
0x10023bd15 <+69>: movq -0x60(%rbp), %rcx
0x10023bd19 <+73>: movq %rcx, (%rax)
0x10023bd1c <+76>: movq $0x989680, 0x93499(%rip) ; lazy cache variable for type metadata for Swift.VaListBuilder + 4
0x10023bd27 <+87>: movq $0x0, 0x93496(%rip) ; swiftTest.pointNumber : Swift.Int + 4
0x10023bd32 <+98>: movq 0x93487(%rip), %rax ; swiftTest.pointNumber : Swift.Int
0x10023bd39 <+105>: movq %rax, -0x68(%rbp)
0x10023bd3d <+109>: xorl %eax, %eax
0x10023bd3f <+111>: movl %eax, %ecx
0x10023bd41 <+113>: movq -0x68(%rbp), %rdx
0x10023bd45 <+117>: cmpq %rdx, %rcx
0x10023bd48 <+120>: setle %sil
0x10023bd4c <+124>: testb $0x1, %sil
0x10023bd50 <+128>: jne 0x10023bd54 ; <+132> at main.swift:17
0x10023bd52 <+130>: jmp 0x10023bdb3 ; <+227> at main.swift:17
0x10023bd54 <+132>: movq -0x68(%rbp), %rax
0x10023bd58 <+136>: incq %rax
0x10023bd5b <+139>: seto %cl
0x10023bd5e <+142>: movq -0x68(%rbp), %rdx
0x10023bd62 <+146>: cmpq %rdx, %rax
0x10023bd65 <+149>: setg %sil
0x10023bd69 <+153>: testb $0x1, %sil
0x10023bd6d <+157>: movb %cl, -0x69(%rbp)
0x10023bd70 <+160>: jne 0x10023bd74 ; <+164> at main.swift:17
0x10023bd72 <+162>: jmp 0x10023bd87 ; <+183> at main.swift:17
0x10023bd74 <+164>: movq -0x68(%rbp), %rax
0x10023bd78 <+168>: incq %rax
0x10023bd7b <+171>: seto %cl
0x10023bd7e <+174>: movq %rax, -0x78(%rbp)
0x10023bd82 <+178>: movb %cl, -0x79(%rbp)
0x10023bd85 <+181>: jmp 0x10023bddf ; <+271> at main.swift:17
0x10023bd87 <+183>: leaq 0x418a2(%rip), %rdi ; "fatal error"
0x10023bd8e <+190>: movl $0xb, %eax
0x10023bd93 <+195>: movl %eax, %esi
0x10023bd95 <+197>: movl $0x2, %eax
0x10023bd9a <+202>: leaq 0x487af(%rip), %rcx ; "Range end index has no valid successor"
0x10023bda1 <+209>: movl $0x26, %edx
0x10023bda6 <+214>: movl %edx, %r8d
0x10023bda9 <+217>: movl %eax, %edx
0x10023bdab <+219>: movl %eax, %r9d
0x10023bdae <+222>: callq 0x1001a80f0 ; function signature specialization <Arg[0] = Exploded, Arg[1] = Exploded, Arg[2] = Dead, Arg[3] = Dead> of Swift._fatalErrorMessage (Swift.StaticString, Swift.StaticString, Swift.StaticString, Swift.UInt) -> ()
0x10023bdb3 <+227>: leaq 0x41876(%rip), %rdi ; "fatal error"
0x10023bdba <+234>: movl $0xb, %eax
0x10023bdbf <+239>: movl %eax, %esi
0x10023bdc1 <+241>: movl $0x2, %eax
0x10023bdc6 <+246>: leaq 0x48753(%rip), %rcx ; "Can't form Range with end < start"
0x10023bdcd <+253>: movl $0x21, %edx
0x10023bdd2 <+258>: movl %edx, %r8d
0x10023bdd5 <+261>: movl %eax, %edx
0x10023bdd7 <+263>: movl %eax, %r9d
0x10023bdda <+266>: callq 0x1001a80f0 ; function signature specialization <Arg[0] = Exploded, Arg[1] = Exploded, Arg[2] = Dead, Arg[3] = Dead> of Swift._fatalErrorMessage (Swift.StaticString, Swift.StaticString, Swift.StaticString, Swift.UInt) -> ()
0x10023bddf <+271>: leaq -0x30(%rbp), %rdi
0x10023bde3 <+275>: leaq -0x20(%rbp), %rsi
0x10023bde7 <+279>: movq $0x0, -0x20(%rbp)
0x10023bdef <+287>: movq -0x78(%rbp), %rax
0x10023bdf3 <+291>: movq %rax, -0x18(%rbp)
0x10023bdf7 <+295>: callq 0x1000362e0 ; generic specialization <Swift.Int with Swift.Int : Swift.ForwardIndexType in Swift, Swift.Int with Swift.Int : Swift._SignedIntegerType in Swift, Swift.Int with Swift.Int : Swift._BuiltinIntegerLiteralConvertible in Swift, Swift.Int> of Swift.Range.generate <A where A: Swift.ForwardIndexType> (Swift.Range<A>)() -> Swift.RangeGenerator<A>
0x10023bdfc <+300>: movq -0x30(%rbp), %rax
0x10023be00 <+304>: movq -0x28(%rbp), %rsi
0x10023be04 <+308>: movq %rax, -0x10(%rbp)
0x10023be08 <+312>: movq %rsi, -0x8(%rbp)
0x10023be0c <+316>: leaq -0x40(%rbp), %rdi
0x10023be10 <+320>: leaq -0x10(%rbp), %rsi
0x10023be14 <+324>: callq 0x100036960 ; generic specialization <Swift.Int with Swift.Int : Swift.ForwardIndexType in Swift, Swift.Int with Swift.Int : Swift._SignedIntegerType in Swift, Swift.Int with Swift.Int : Swift._BuiltinIntegerLiteralConvertible in Swift, Swift.Int> of Swift.RangeGenerator.next <A where A: Swift.ForwardIndexType> (inout Swift.RangeGenerator<A>)() -> Swift.Optional<A>
0x10023be19 <+329>: movq -0x40(%rbp), %rsi
0x10023be1d <+333>: movb -0x38(%rbp), %al
0x10023be20 <+336>: xorb $0x1, %al
0x10023be22 <+338>: testb $0x1, %al
0x10023be24 <+340>: movq %rsi, -0x88(%rbp)
0x10023be2b <+347>: jne 0x10023be32 ; <+354> at main.swift:17
0x10023be2d <+349>: jmp 0x10023bed4 ; <+516> at main.swift:23
0x10023be32 <+354>: movsd 0x3bf66(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 344
0x10023be3a <+362>: movsd 0x3bf56(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be42 <+370>: movq -0x88(%rbp), %rax
0x10023be49 <+377>: movq %rax, -0x48(%rbp)
0x10023be4d <+381>: callq 0x10023c160 ; swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double at main.swift:11
0x10023be52 <+386>: movsd 0x3bf46(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 344
0x10023be5a <+394>: movsd 0x3bf36(%rip), %xmm2 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be62 <+402>: movsd %xmm0, -0x50(%rbp)
0x10023be67 <+407>: movsd %xmm0, -0x90(%rbp)
0x10023be6f <+415>: movaps %xmm1, %xmm0
0x10023be72 <+418>: movaps %xmm2, %xmm1
0x10023be75 <+421>: callq 0x10023c160 ; swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double at main.swift:11
0x10023be7a <+426>: movsd 0x3bf16(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be82 <+434>: movsd %xmm0, -0x58(%rbp)
0x10023be87 <+439>: movsd -0x90(%rbp), %xmm2
0x10023be8f <+447>: mulsd %xmm2, %xmm2
0x10023be93 <+451>: mulsd %xmm0, %xmm0
0x10023be97 <+455>: addsd %xmm0, %xmm2
0x10023be9b <+459>: ucomisd %xmm2, %xmm1
0x10023be9f <+463>: jb 0x10023becf ; <+511> at main.swift:23
0x10023bea1 <+465>: movq 0x93320(%rip), %rax ; swiftTest.pointInsideCount : Swift.Int
0x10023bea8 <+472>: incq %rax
0x10023beab <+475>: seto %cl
0x10023beae <+478>: movq %rax, -0x98(%rbp)
0x10023beb5 <+485>: movb %cl, -0x99(%rbp)
0x10023bebb <+491>: jo 0x10023c155 ; <+1157> at main.swift:21
0x10023bec1 <+497>: movq -0x98(%rbp), %rax
0x10023bec8 <+504>: movq %rax, 0x932f9(%rip) ; swiftTest.pointInsideCount : Swift.Int
0x10023becf <+511>: jmp 0x10023be0c ; <+316> at main.swift:17
0x10023bed4 <+516>: movsd 0x3beb4(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 328
0x10023bedc <+524>: cvtsi2sdq 0x932e3(%rip), %xmm1 ; swiftTest.pointInsideCount : Swift.Int
0x10023bee5 <+533>: cvtsi2sdq 0x932d2(%rip), %xmm2 ; swiftTest.pointNumber : Swift.Int
0x10023beee <+542>: divsd %xmm2, %xmm1
0x10023bef2 <+546>: mulsd %xmm0, %xmm1
0x10023bef6 <+550>: movsd %xmm1, 0x932d2(%rip) ; swiftTest.result : Swift.Double
0x10023befe <+558>: callq 0x10023c1b0 ; type metadata accessor for Swift.CVarArgType
0x10023bf03 <+563>: movl $0x1, %ecx
0x10023bf08 <+568>: movl %ecx, %edi
0x10023bf0a <+570>: movq %rax, %rsi
0x10023bf0d <+573>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bf12 <+578>: leaq 0x4865e(%rip), %rdi ; "%.50f"
0x10023bf19 <+585>: movl $0x5, %ecx
0x10023bf1e <+590>: movl %ecx, %esi
0x10023bf20 <+592>: movl $0x1, %ecx
0x10023bf25 <+597>: movq %rdx, -0xa8(%rbp)
0x10023bf2c <+604>: movl %ecx, %edx
0x10023bf2e <+606>: movq %rax, -0xb0(%rbp)
0x10023bf35 <+613>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023bf3a <+618>: leaq 0x667b7(%rip), %rsi ; protocol witness table for Swift.Double : Swift.CVarArgType in Swift
0x10023bf41 <+625>: leaq 0x6a258(%rip), %rdi ; direct type metadata for Swift.Double
0x10023bf48 <+632>: addq $0x8, %rdi
0x10023bf4f <+639>: movq -0xa8(%rbp), %r8
0x10023bf56 <+646>: movq %rdi, 0x18(%r8)
0x10023bf5a <+650>: movq %rsi, 0x20(%r8)
0x10023bf5e <+654>: movsd 0x9326a(%rip), %xmm0 ; swiftTest.result : Swift.Double
0x10023bf66 <+662>: movsd %xmm0, (%r8)
0x10023bf6b <+667>: movq %rax, %rdi
0x10023bf6e <+670>: movq %rdx, %rsi
0x10023bf71 <+673>: movq %rcx, %rdx
0x10023bf74 <+676>: movq -0xb0(%rbp), %rcx
0x10023bf7b <+683>: callq 0x10002dfa0 ; ext.Foundation.Swift.String.init (Swift.String.Type)(format : Swift.String, Swift.Array<Swift.CVarArgType>...) -> Swift.String
0x10023bf80 <+688>: movq %rax, 0x93251(%rip) ; swiftTest.piString : Swift.String
0x10023bf87 <+695>: movq %rdx, 0x93252(%rip) ; swiftTest.piString : Swift.String + 8
0x10023bf8e <+702>: movq %rcx, 0x93253(%rip) ; swiftTest.piString : Swift.String + 16
-> 0x10023bf95 <+709>: callq 0x10023c200 ; type metadata accessor for protocol<>
0x10023bf9a <+714>: movl $0x1, %r9d
0x10023bfa0 <+720>: movl %r9d, %edi
0x10023bfa3 <+723>: movq %rax, %rsi
0x10023bfa6 <+726>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bfab <+731>: movl $0x3, %r9d
0x10023bfb1 <+737>: movl %r9d, %edi
0x10023bfb4 <+740>: leaq 0x6fe25(%rip), %rcx ; direct type metadata for Swift.String
0x10023bfbb <+747>: addq $0x8, %rcx
0x10023bfc2 <+754>: movq %rcx, 0x18(%rdx)
0x10023bfc6 <+758>: movq %rcx, %rsi
0x10023bfc9 <+761>: movq %rax, -0xb8(%rbp)
0x10023bfd0 <+768>: movq %rdx, -0xc0(%rbp)
0x10023bfd7 <+775>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bfdc <+780>: leaq 0x4859a(%rip), %rdi ; "Pi is "
0x10023bfe3 <+787>: movl $0x6, %r9d
0x10023bfe9 <+793>: movl %r9d, %esi
0x10023bfec <+796>: movl $0x1, %r9d
0x10023bff2 <+802>: movq %rdx, -0xc8(%rbp)
0x10023bff9 <+809>: movl %r9d, %edx
0x10023bffc <+812>: movq %rax, -0xd0(%rbp)
0x10023c003 <+819>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023c008 <+824>: movq %rax, %rdi
0x10023c00b <+827>: movq %rdx, %rsi
0x10023c00e <+830>: movq %rcx, %rdx
0x10023c011 <+833>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c016 <+838>: movq -0xc8(%rbp), %rsi
0x10023c01d <+845>: movq %rax, (%rsi)
0x10023c020 <+848>: movq %rdx, 0x8(%rsi)
0x10023c024 <+852>: movq %rcx, 0x10(%rsi)
0x10023c028 <+856>: movq 0x931a9(%rip), %rdi ; swiftTest.piString : Swift.String
0x10023c02f <+863>: movq 0x931aa(%rip), %rsi ; swiftTest.piString : Swift.String + 8
0x10023c036 <+870>: movq 0x931ab(%rip), %rax ; swiftTest.piString : Swift.String + 16
0x10023c03d <+877>: movq %rdi, -0xd8(%rbp)
0x10023c044 <+884>: movq %rax, %rdi
0x10023c047 <+887>: movq %rsi, -0xe0(%rbp)
0x10023c04e <+894>: movq %rax, -0xe8(%rbp)
0x10023c055 <+901>: callq 0x100268160 ; swift_unknownRetain
0x10023c05a <+906>: movq -0xd8(%rbp), %rdi
0x10023c061 <+913>: movq -0xe0(%rbp), %rsi
0x10023c068 <+920>: movq -0xe8(%rbp), %rdx
0x10023c06f <+927>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c074 <+932>: leaq 0x40d15(%rip), %rdi ; ""
0x10023c07b <+939>: xorl %r9d, %r9d
0x10023c07e <+942>: movl %r9d, %esi
0x10023c081 <+945>: movl $0x1, %r9d
0x10023c087 <+951>: movq -0xc8(%rbp), %r8
0x10023c08e <+958>: movq %rax, 0x18(%r8)
0x10023c092 <+962>: movq %rdx, 0x20(%r8)
0x10023c096 <+966>: movq %rcx, 0x28(%r8)
0x10023c09a <+970>: movl %r9d, %edx
0x10023c09d <+973>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023c0a2 <+978>: movq %rax, %rdi
0x10023c0a5 <+981>: movq %rdx, %rsi
0x10023c0a8 <+984>: movq %rcx, %rdx
0x10023c0ab <+987>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c0b0 <+992>: movq -0xc8(%rbp), %rsi
0x10023c0b7 <+999>: movq %rax, 0x30(%rsi)
0x10023c0bb <+1003>: movq %rdx, 0x38(%rsi)
0x10023c0bf <+1007>: movq %rcx, 0x40(%rsi)
0x10023c0c3 <+1011>: movq -0xd0(%rbp), %rdi
0x10023c0ca <+1018>: callq 0x1000470c0 ; Swift.String.init (Swift.String.Type)(stringInterpolation : Swift.Array<Swift.String>...) -> Swift.String
0x10023c0cf <+1023>: movq -0xc0(%rbp), %rsi
0x10023c0d6 <+1030>: movq %rax, (%rsi)
0x10023c0d9 <+1033>: movq %rdx, 0x8(%rsi)
0x10023c0dd <+1037>: movq %rcx, 0x10(%rsi)
0x10023c0e1 <+1041>: callq 0x10012aa70 ; Swift.(print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()).(default argument 1)
0x10023c0e6 <+1046>: movq %rax, -0xf0(%rbp)
0x10023c0ed <+1053>: movq %rdx, -0xf8(%rbp)
0x10023c0f4 <+1060>: movq %rcx, -0x100(%rbp)
0x10023c0fb <+1067>: callq 0x10012aa90 ; Swift.(print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()).(default argument 2)
0x10023c100 <+1072>: movq -0xb8(%rbp), %rdi
0x10023c107 <+1079>: movq -0xf0(%rbp), %rsi
0x10023c10e <+1086>: movq -0xf8(%rbp), %r8
0x10023c115 <+1093>: movq %rdx, -0x108(%rbp)
0x10023c11c <+1100>: movq %r8, %rdx
0x10023c11f <+1103>: movq -0x100(%rbp), %r10
0x10023c126 <+1110>: movq %rcx, -0x110(%rbp)
0x10023c12d <+1117>: movq %r10, %rcx
0x10023c130 <+1120>: movq %rax, %r8
0x10023c133 <+1123>: movq -0x108(%rbp), %r9
0x10023c13a <+1130>: movq -0x110(%rbp), %rax
0x10023c141 <+1137>: movq %rax, (%rsp)
0x10023c145 <+1141>: callq 0x10012aab0 ; Swift.print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()
0x10023c14a <+1146>: xorl %eax, %eax
0x10023c14c <+1148>: addq $0x120, %rsp
0x10023c153 <+1155>: popq %rbp
0x10023c154 <+1156>: retq
0x10023c155 <+1157>: ud2
0x10023c157 <+1159>: nopw (%rax,%rax)
我可以估算时间消耗如下吗?
randomNumber 函数由大约20条指令组成,因此x和y的计算由大约40条指令组成。添加pointInsideCount只执行几条指令,因此在for循环中大约有4~5打指令(假设它是50)。可以忽略for循环之外的时间消耗。
如果我假设4560U平均每循环运行2条指令,当循环计数为10 ^ 8时,空穴时间消耗约为50 * 10 ^ 8 /(1.7 * 10 ^ 9 * 2)
答案 0 :(得分:1)
您不能为所有循环假设相同的IPC 。当然循环运行2个IPC,但是它并没有告诉你任何有关其他循环的信息。您必须仔细分析代码以找出瓶颈和并行度。
如果您可以安全地假设没有缓存未命中或分支错误预测,您可以通过英特尔静态代码分析器using IACA为特定的英特尔微体系结构获得小循环的合理循环计数估计值。它远不是对真实硬件的完全周期精确模拟,但它确实有自己的模型用于将uop分配到端口。它通常得到合理的数字。
您也可以使用Agner Fog's指令表和微体系结构指南手动进行相同类型的分析(包括IACA不了解的CPU)。
当一个循环对循环携带的依赖链的延迟产生瓶颈,或者仅使一个执行端口饱和时,事情通常会非常准确。
在高吞吐量的情况下,有许多微妙的影响可以阻止您希望每个时钟可以在4个融合域uop运行的代码。前端只能维持非常小的循环(~28或56 uop),因为即使uop缓存的吞吐量也有限,因为uop-cache-line boundary和uop不是4组。
Significant FMA performance anomaly experienced in the Intel Broadwell processor是如何让事情变得难以理解的一个很好的例子。您希望代码能够使所有三个矢量执行端口饱和,它在Haswell上完成,几乎在Skylake上,但在Broadwell上甚至没有。而且这甚至都不是前端瓶颈,因为循环足够小,适合循环缓冲区。
如果这一切听起来真的很难和复杂,那就是因为它。这就是基准测试比静态分析更有用的原因。但是,微基准测试确实容易出错。你应该看看asm,以确保你没有搞砸,让编译器优化掉你想要测试的东西。您还需要了解很多有关CPU如何工作以避免陷阱的信息,例如将其他东西放入微基准测试中,并使其占据运行时而不是您想要测试的东西。