swapAt()
比(nums[i], nums[j]) = (nums[j], nums[i])
操作快,我想知道为什么吗?
手动交换解决方案需要44毫秒,但是swapAt
只需40毫秒。我想知道有什么区别吗?
手动交换解决方案(44毫秒)
func moveZeroes(_ nums: inout [Int]) {
var j = 0;
for i in 0..<nums.count {
if (nums[i] != 0) {
(nums[i], nums[j]) = (nums[j], nums[i])
j += 1
}
}
}
swapAt解决方案(40毫秒)
func moveZeroes(_ nums: inout [Int]) {
var j = 0;
for i in 0..<nums.count {
if (nums[i] != 0) {
nums.swapAt(i, j)
j += 1
}
}
}
答案 0 :(得分:2)
首先,关于基准测试的一些一般性观察:
第二,我应该说元组和swapAt
表示形式之间的差异很小。我遍历了十亿次(排列1000个项目,重复一百万次),而两者之间的差异仍然只有几分之一秒。
第三,我发现在处理大型随机数据集(其中10%的数据点为零)时,使用元组进行交换比使用swapAt
稍快,但是在很少交换的情况下,使用元组进行交换要慢一些真的需要。
后一点很有意义,因为swapAt
will skip the swap operation if it’s not necessary:“以与swapAt(_:_:)
和i
相同的索引调用j
无效。” { {3}} swapAt
早于i
和j
退出是相同的:
@inlinable
public mutating func swapAt(_ i: Index, _ j: Index) {
guard i != j else { return }
let tmp = self[i]
self[i] = self[j]
self[j] = tmp
}
如果您修改元组例程以进行相同的检查,则差异会减小:
func moveZeroesTuple(_ nums: inout [Int]) {
var j = 0
for i in 0..<nums.count where nums[i] != 0 {
if i != j {
(nums[i], nums[j]) = (nums[j], nums[i])
}
j += 1
}
}
坦率地说,基于元组的方法是如此之快,我并不感到惊讶,但是在组装过程中,优化器在将其精简为相当精简的东西方面做得很好:
0x100b5fb70 <+192>: movq 0x20(%rbx,%rdx,8), %rsi ; retrieve nums[i]
0x100b5fb75 <+197>: testq %rsi, %rsi ; is it zero?
0x100b5fb78 <+200>: je 0x100b5fb98 ; <+232> [inlined] protocol witness for Swift.Strideable.advanced(by: A.Stride) -> A in conformance Swift.Int : Swift.Strideable in Swift
0x100b5fb7a <+202>: cmpq %rcx, %rdx ; is i = j?
0x100b5fb7d <+205>: je 0x100b5fb93 ; <+227> [inlined] MyAppTests.MyAppTests.moveZeroesTuple(inout Swift.Array<Swift.Int>) -> () + 66 at MyAppTests.swift:120
0x100b5fb7f <+207>: cmpq (%rax), %rcx ; are we done?
0x100b5fb82 <+210>: jae 0x100b5fbc7 ; <+279> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A
0x100b5fb84 <+212>: movq 0x20(%rbx,%rcx,8), %rdi ; retrieve nums[j]
0x100b5fb89 <+217>: movq %rdi, 0x20(%rbx,%rdx,8) ; save it in nums[i]
0x100b5fb8e <+222>: movq %rsi, 0x20(%rbx,%rcx,8) ; save previously retrieved nums[i] in nums[j]
0x100b5fb93 <+227>: incq %rcx ; j += 1
0x100b5fb96 <+230>: jo 0x100b5fbc5 ; <+277> [inlined] MyAppTests.MyAppTests.moveZeroesTuple(inout Swift.Array<Swift.Int>) -> () at MyAppTests.swift:120
0x100b5fb98 <+232>: incq %rdx ; i += 1
0x100b5fb9b <+235>: cmpq %rdx, %r12 ; repeat for loop
0x100b5fb9e <+238>: jne 0x100b5fb70 ; <+192> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A + 15
经过优化的swapAt
程序集几乎相同(由于有内联功能),但是其中只包含了几条指令:
0x100b5d920 <+192>: movq 0x20(%rbx,%rdx,8), %rsi
0x100b5d925 <+197>: testq %rsi, %rsi
0x100b5d928 <+200>: je 0x100b5d955 ; <+245> [inlined] protocol witness for Swift.Strideable.advanced(by: A.Stride) -> A in conformance Swift.Int : Swift.Strideable in Swift
0x100b5d92a <+202>: cmpq %rcx, %rdx
0x100b5d92d <+205>: je 0x100b5d950 ; <+240> [inlined] MyAppTests.MyAppTests.moveZeroesSwapAt(inout Swift.Array<Swift.Int>) -> () + 79 at MyAppTests.swift:128
0x100b5d92f <+207>: movq (%rax), %rdi
0x100b5d932 <+210>: cmpq %rdi, %rdx
0x100b5d935 <+213>: jae 0x100b5d984 ; <+292> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A
0x100b5d937 <+215>: cmpq %rdi, %rcx
0x100b5d93a <+218>: jae 0x100b5d986 ; <+294> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A + 2
0x100b5d93c <+220>: movq 0x20(%rbx,%rcx,8), %rdi
0x100b5d941 <+225>: movq %rdi, 0x20(%rbx,%rdx,8)
0x100b5d946 <+230>: cmpq (%rax), %rcx
0x100b5d949 <+233>: jge 0x100b5d988 ; <+296> [inlined] generic specialization <Swift.Int> of Swift._ArrayBuffer._checkInoutAndNativeTypeCheckedBounds(_: Swift.Int, wasNativeTypeChecked: Swift.Bool) -> ()
0x100b5d94b <+235>: movq %rsi, 0x20(%rbx,%rcx,8)
0x100b5d950 <+240>: incq %rcx
0x100b5d953 <+243>: jo 0x100b5d982 ; <+290> [inlined] MyAppTests.MyAppTests.moveZeroesSwapAt(inout Swift.Array<Swift.Int>) -> () at MyAppTests.swift:128
0x100b5d955 <+245>: incq %rdx
0x100b5d958 <+248>: cmpq %rdx, %r12
0x100b5d95b <+251>: jne 0x100b5d920 ; <+192> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A + 15
不过,这几条额外的说明似乎使元组方法在“许多交换”情况下略有优势。
最重要的是,在我的基准测试中,除了最庞大的数据集之外,两者之间的差异可以忽略不计,否则不值得担心。而且性能特征取决于数据集的性质,在一般情况下,没有明显的赢家。而且,坦率地说,如果您的数据集实际上足够大,足以保证可以相互挑选一个,那么您确实应该完全考虑使用另一种算法。
如果您有兴趣,这是我的单元测试,已通过“发布”版本进行测试,并启用了“随机执行顺序”:
import XCTest
class MyAppTests: XCTestCase {
static var originalArray: [Int]!
let iterations = 1_000_000
// fyi, use static to make sure all tests use the same original array
// eliminating any discrepancies between different tests, within the same
// test run, having different data and therefore different number of swaps.
override static func setUp() {
print("building array")
// scenario 1: no swapping needed
//
// Array with 1000 integers, 10% which are zeros and the rest are non-zero
originalArray = (0..<900).map { _ in Int.random(in: 1..<10) } + [Int](repeating: 0, count: 100)
// scenario 2: some swapping needed
//
// if you don't want them randomized, comment the following line
originalArray.shuffle()
// scenario 3: since all zeros are at the start, the maximum amount of swapping
//
// if you want zeros at start, uncomment the following line
//
// originalArray.sort()
}
func moveZeroesTuple(_ nums: inout [Int]) {
var j = 0
for i in 0..<nums.count where nums[i] != 0 {
if i != j {
(nums[i], nums[j]) = (nums[j], nums[i])
}
j += 1
}
}
func moveZeroesShiftManual(_ nums: inout [Int]) {
var i = 0
let count = nums.count
while i < count, nums[i] != 0 {
i += 1
}
var j = i
while i < count {
if nums[i] != 0 {
nums[j] = nums[i]
j += 1
}
i += 1
}
while j < count {
nums[j] = 0
j += 1
}
}
func moveZeroesSwapManual(_ nums: inout [Int]) {
var j = 0
for i in 0..<nums.count where nums[i] != 0 {
if i != j {
let temp = nums[i]
nums[i] = nums[j]
nums[j] = temp
}
j += 1
}
}
func moveZeroesSwapAt(_ nums: inout [Int]) {
var j = 0
for i in 0 ..< nums.count where nums[i] != 0 {
nums.swapAt(i, j)
j += 1
}
}
// a quick test to make sure all solutions generate the same result
func testLogic() {
var arrayTuple = MyAppTests.originalArray!
moveZeroesTuple(&arrayTuple)
var arraySwapAt = MyAppTests.originalArray!
moveZeroesSwapAt(&arraySwapAt)
var arrayShiftManual = MyAppTests.originalArray!
moveZeroesShiftManual(&arrayShiftManual)
var arraySwapManual = MyAppTests.originalArray!
moveZeroesSwapManual(&arraySwapManual)
XCTAssertEqual(arrayTuple, arrayShiftManual)
XCTAssertEqual(arrayTuple, arraySwapManual)
XCTAssertEqual(arrayTuple, arraySwapAt)
}
// now the performance tests
func testTuple() {
measure {
for _ in 0 ..< iterations {
var array = MyAppTests.originalArray!
moveZeroesTuple(&array)
}
}
}
func testSwapAt() {
measure {
for _ in 0 ..< iterations {
var array = MyAppTests.originalArray!
moveZeroesSwapAt(&array)
}
}
}
func testShiftManual() {
measure {
for _ in 0 ..< iterations {
var array = MyAppTests.originalArray!
moveZeroesShiftManual(&array)
}
}
}
func testSwapManual() {
measure {
for _ in 0 ..< iterations {
var array = MyAppTests.originalArray!
moveZeroesSwapManual(&array)
}
}
}
}
通常,在发布版本上进行测试时,我将对结果进行某些处理,以确保编译器不会完全优化某些关键代码部分。我在这里不需要这样做,只是因为我仔细检查了各种方法的程序集,并且知道所有内容都已保存。