Swift:什么是分割[String]的正确方法,导致[[String]]具有给定的子阵列大小?

时间:2014-10-16 03:30:15

标签: ios swift algorithm

从大的[String]和给定的子阵列大小开始,将这个数组拆分成更小的数组的最佳方法是什么? (最后一个数组将小于给定的子数组大小。)

具体示例:

  

拆分[" 1"," 2"," 3"," 4"," 5" ," 6"," 7"]最大分割尺寸2

     

代码会产生[[" 1"," 2"],[" 3"," 4"],[& #34; 5"" 6"],[" 7"]]

显然我可以手动多做一点,但我觉得像map()或者reduce()这样的东西可以做我想要的非常漂亮。

12 个答案:

答案 0 :(得分:46)

在Swift 3/4中,这将如下所示:

AND ca.createdate BETWEEN '2016-06-02'::timestamp AND '2016-07-02'::timestamp

作为Array的扩展:

let numbers = ["1","2","3","4","5","6","7"]
let chunkSize = 2
let chunks = stride(from: 0, to: numbers.count, by: chunkSize).map {
    Array(numbers[$0..<min($0 + chunkSize, numbers.count)])
}
// prints as [["1", "2"], ["3", "4"], ["5", "6"], ["7"]]

或稍微冗长,但更为笼统:

extension Array {
    func chunked(by chunkSize: Int) -> [[Element]] {
        return stride(from: 0, to: self.count, by: chunkSize).map {
            Array(self[$0..<Swift.min($0 + chunkSize, self.count)])
        }
    }
}

这更通用,因为我对集合中索引的类型做了更少的假设。在之前的实现中,我假设它们可以进行比较和添加。

请注意,在Swift 3中,推进索引的功能已从索引本身转移到集合中。

答案 1 :(得分:34)

使用Swift 4.2和Swift 5,根据您的需要,您可以选择以下五种之一来解决您的问题。

1。在AnyIterator扩展方法

中使用Collection

AnyIterator是一个很好的候选者,可以迭代符合Collection协议的对象的索引,以便返回此对象的子序列。在Collection协议扩展中,您可以使用以下实现声明chunked(by:)方法:

extension Collection {

    func chunked(by distance: Int) -> [[Element]] {
        precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop

        var index = startIndex
        let iterator: AnyIterator<Array<Element>> = AnyIterator({
            let newIndex = self.index(index, offsetBy: distance, limitedBy: self.endIndex) ?? self.endIndex
            defer { index = newIndex }
            let range = index ..< newIndex
            return index != self.endIndex ? Array(self[range]) : nil
        })

        return Array(iterator)
    }

}

用法:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

2。在stride(from:to:by:)扩展方法

中使用Array函数

Array索引的类型为Int,符合Strideable协议。因此,您可以使用stride(from:to:by:)advanced(by:)。在Array扩展程序中,您可以使用以下实现声明chunked(by:)方法:

extension Array {

    func chunked(by distance: Int) -> [[Element]] {
        let indicesSequence = stride(from: startIndex, to: endIndex, by: distance)
        let array: [[Element]] = indicesSequence.map {
            let newIndex = $0.advanced(by: distance) > endIndex ? endIndex : $0.advanced(by: distance)
            //let newIndex = self.index($0, offsetBy: distance, limitedBy: self.endIndex) ?? self.endIndex // also works
            return Array(self[$0 ..< newIndex])
        }
        return array
    }

}

用法:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

3。在Array扩展方法

中使用递归方法

根据Nate Cook recursive code,您可以在chunked(by:)扩展名中声明Array方法,其中包含以下实现:

extension Array {

    func chunked(by distance: Int) -> [[Element]] {
        precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop

        if self.count <= distance {
            return [self]
        } else {
            let head = [Array(self[0 ..< distance])]
            let tail = Array(self[distance ..< self.count])
            return head + tail.chunked(by: distance)
        }
    }

}

用法:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

4。在Collection扩展方法

中使用for循环和批处理

Chris Eidhof和Florian Kugler在Swift Talk #33 - Sequence & Iterator (Collections #2)视频中展示了如何使用简单的for循环来填充批量序列元素,并在完成时将它们附加到数组中。在Sequence扩展程序中,您可以使用以下实现声明chunked(by:)方法:

extension Collection {

    func chunked(by distance: Int) -> [[Element]] {
        var result: [[Element]] = []
        var batch: [Element] = []

        for element in self {
            batch.append(element)

            if batch.count == distance {
                result.append(batch)
                batch = []
            }
        }

        if !batch.isEmpty {
            result.append(batch)
        }

        return result
    }

}

用法:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

5。使用符合structSequence协议

的自定义IteratorProtocol

如果您不想创建SequenceCollectionArray的扩展程序,则可以创建符合structSequence的自定义IteratorProtocol struct协议。此struct BatchSequence<T>: Sequence, IteratorProtocol { private let array: [T] private let distance: Int private var index = 0 init(array: [T], distance: Int) { precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop self.array = array self.distance = distance } mutating func next() -> [T]? { guard index < array.endIndex else { return nil } let newIndex = index.advanced(by: distance) > array.endIndex ? array.endIndex : index.advanced(by: distance) defer { index = newIndex } return Array(array[index ..< newIndex]) } } 应具有以下实现:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let batchSequence = BatchSequence(array: array, distance: 2)
let newArray = Array(batchSequence)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

用法:

{{1}}

答案 2 :(得分:26)

我不会称之为漂亮,但这是使用map的方法:

let numbers = ["1","2","3","4","5","6","7"]
let splitSize = 2
let chunks = numbers.startIndex.stride(to: numbers.count, by: splitSize).map {
  numbers[$0 ..< $0.advancedBy(splitSize, limit: numbers.endIndex)]
}

stride(to:by:)方法为您提供每个块的第一个元素的索引,因此您可以使用advancedBy(distance:limit:)将这些索引映射到源数组的切片。

更多&#34;功能性&#34;方法只是对数组进行递归,如下所示:

func chunkArray<T>(s: [T], splitSize: Int) -> [[T]] {
    if countElements(s) <= splitSize {
        return [s]
    } else {
        return [Array<T>(s[0..<splitSize])] + chunkArray(Array<T>(s[splitSize..<s.count]), splitSize)
    }
}

答案 3 :(得分:8)

我喜欢Nate Cook的回答,看起来Swift自编写以来就一直在继续,这是我对此作为Array的扩展:

extension Array {
    func chunk(chunkSize : Int) -> Array<Array<Element>> {
        return 0.stride(to: self.count, by: chunkSize)
            .map { Array(self[$0..<$0.advancedBy(chunkSize, limit: self.count)]) }
    }
}

注意,它会为负数返回[]并导致致命错误,如上所述。如果你想阻止这种情况,你就必须加注警惕。

func testChunkByTwo() {
    let input = [1,2,3,4,5,6,7]
    let output = input.chunk(2)
    let expectedOutput = [[1,2], [3,4], [5,6], [7]]
    XCTAssertEqual(expectedOutput, output)
}

func testByOne() {
    let input = [1,2,3,4,5,6,7]
    let output = input.chunk(1)
    let expectedOutput = [[1],[2],[3],[4],[5],[6],[7]]
    XCTAssertEqual(expectedOutput, output)
}

func testNegative() {
    let input = [1,2,3,4,5,6,7]
    let output = input.chunk(-2)
    let expectedOutput = []
    XCTAssertEqual(expectedOutput, output)
}

答案 4 :(得分:4)

我认为你不想使用map或者reduce。 Map用于在数组中的每个单独元素上应用函数,而reduce用于展平数组。你想要做的是将数组切成一定大小的子数组。此代码段使用切片。

var arr = ["1","2","3","4","5","6","7"]
var splitSize = 2

var newArr = [[String]]()
var i = 0
while i < arr.count {
    var slice: Slice<String>!
    if i + splitSize >= arr.count {
        slice = arr[i..<arr.count]
    }
    else {
        slice = arr[i..<i+splitSize]
    }
    newArr.append(Array(slice))
    i += slice.count
}
println(newArr)

答案 5 :(得分:3)

很高兴将Tyler Cloutier's formulation表达为Array的扩展名:

extension Array {
    func chunked(by chunkSize:Int) -> [[Element]] {
        let groups = stride(from: 0, to: self.count, by: chunkSize).map {
            Array(self[$0..<[$0 + chunkSize, self.count].min()!])
        }
        return groups
    }
}

这为我们提供了一种将数组分区为块的一般方法。

答案 6 :(得分:3)

Swift 4中的新功能,您可以使用reduce(into:)有效地执行此操作。这是序列的扩展:

extension Sequence {
    func eachSlice(_ clump:Int) -> [[Self.Element]] {
        return self.reduce(into:[]) { memo, cur in
            if memo.count == 0 {
                return memo.append([cur])
            }
            if memo.last!.count < clump {
                memo.append(memo.removeLast() + [cur])
            } else {
                memo.append([cur])
            }
        }
    }
}

用法:

let result = [1,2,3,4,5,6,7,8,9].eachSlice(2)
// [[1, 2], [3, 4], [5, 6], [7, 8], [9]]

答案 7 :(得分:2)

我将在这里使用基于AnyGenerator.

的另一种实现方式将我的帽子放在戒指中
extension Array {
    func chunks(_ size: Int) -> AnyIterator<[Element]> {
        if size == 0 {
            return AnyIterator {
                return nil
            }
        }

        let indices = stride(from: startIndex, to: count, by: size)
        var generator = indices.makeIterator()

        return AnyIterator {
            guard let i = generator.next() else {
                return nil
            }

            var j = self.index(i, offsetBy: size)
            repeat {
                j = self.index(before: j)
            } while j >= self.endIndex

            return self[i...j].lazy.map { $0 }
        }
    }
}

我更喜欢这种方法,因为它完全依赖于生成器,这些生成器在处理大型数组时可能具有不可忽略的正面内存影响。

对于您的具体示例,以下是它的工作原理:

let chunks = Array(["1","2","3","4","5","6","7"].chunks(2))

结果:

[["1", "2"], ["3", "4"], ["5", "6"], ["7"]]

答案 8 :(得分:2)

在Swift 4或更高版本中,您还可以扩展Collection并返回其中的SubSequence集合,以便能够与StringProtocol类型(StringSubstring)。这样,它将返回子字符串的集合,而不是一堆字符的集合:

Xcode 10.1•Swift 4.2.1或更高版本

extension Collection {
    func subSequences(limitedTo maxLength: Int) -> [SubSequence] {
        precondition(maxLength > 0, "groups must be greater than zero")
        var start = startIndex
        return stride(from: 0, to: count, by: maxLength).map { _ in
            let end = index(start, offsetBy: maxLength, limitedBy: endIndex) ?? endIndex
            defer { start = end }
            return self[start..<end]
        }
    }
}

用法

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let slices = array.subSequences(limitedTo: 2)  // [ArraySlice(["1", "2"]), ArraySlice(["3", "4"]), ArraySlice(["5", "6"]), ArraySlice(["7", "8"]), ArraySlice(["9"])]
for slice in slices {
    print(slice) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]
}
// To convert from ArraySlice<Element> to Array<element>
let arrays = slices.map(Array.init)  // [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]


extension Collection {
    var singles: [SubSequence] { return subSequences(limitedTo: 1) }
    var pairs:   [SubSequence] { return subSequences(limitedTo: 2) }
    var triples: [SubSequence] { return subSequences(limitedTo: 3) }
    var quads:   [SubSequence] { return subSequences(limitedTo: 4) }
}

字符数组或ArraySlice

let chars = ["a","b","c","d","e","f","g","h","i"]
chars.singles  // [["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"]]
chars.pairs    // [["a", "b"], ["c", "d"], ["e", "f"], ["g", "h"], ["i"]]
chars.triples  // [["a", "b", "c"], ["d", "e", "f"], ["g", "h", "i"]]
chars.quads    // [["a", "b", "c", "d"], ["e", "f", "g", "h"], ["i"]]
chars.dropFirst(2).quads  // [["c", "d", "e", "f"], ["g", "h", "i"]]

StringProtocol元素(字符串和子字符串)

let str = "abcdefghi"
str.singles  // ["a", "b", "c", "d", "e", "f", "g", "h", "i"]
str.pairs    // ["ab", "cd", "ef", "gh", "i"]
str.triples  // ["abc", "def", "ghi"]
str.quads    // ["abcd", "efgh", "i"]
str.dropFirst(2).quads    // ["cdef", "ghi"]

答案 9 :(得分:0)

你知道任何具有[a ... b] swift风格的解决方案比常规方式慢10倍吗?

for y in 0..<rows {
    var row = [Double]()
    for x in 0..<cols {
        row.append(stream[y * cols + x])
    }
    mat.append(row)
}

试试看,这将是我的测试原始代码:

let count = 1000000
let cols = 1000
let rows = count / cols
var stream = [Double].init(repeating: 0.5, count: count)

// Regular
var mat = [[Double]]()

let t1 = Date()

for y in 0..<rows {
    var row = [Double]()
    for x in 0..<cols {
        row.append(stream[y * cols + x])
    }
    mat.append(row)
}

print("regular: \(Date().timeIntervalSince(t1))")


//Swift
let t2 = Date()

var mat2: [[Double]] = stride(from: 0, to: stream.count, by: cols).map {
    let end = stream.endIndex
    let chunkEnd = stream.index($0, offsetBy: cols, limitedBy: end) ?? end
    return Array(stream[$0..<chunkEnd])
}

print("swift: \(Date().timeIntervalSince(t2))")

and out:

常规:0.0449600219726562

swift:0.49255496263504

答案 10 :(得分:0)

Swift 5.1-各种集合的通用解决方案:

extension Collection where Index == Int {
    func chunked(by chunkSize: Int) -> [[Element]] {
        stride(from: startIndex, to: endIndex, by: chunkSize).map { Array(self[$0..<Swift.min($0 + chunkSize, count)]) }
    }
}

答案 11 :(得分:0)

public extension Optional {
  /// Wraps a value in an `Optional`, based on a condition.
  /// - Parameters:
  ///   - wrapped: A non-optional value.
  ///   - getIsNil: The condition that will result in `nil`.
  init(
    _ wrapped: Wrapped,
    nilWhen getIsNil: (Wrapped) throws -> Bool
  ) rethrows {
    self = try getIsNil(wrapped) ? nil : wrapped
  }
}
public extension Sequence {
  /// Splits a `Sequence` into equal "chunks".
  ///
  /// - Parameter maxArrayCount: The maximum number of elements in a chunk.
  /// - Returns: `Array`s with `maxArrayCount` `counts`,
  ///   until the last chunk, which may be smaller.
  subscript(maxArrayCount maxCount: Int) -> AnySequence<[Element]> {
    .init(
      sequence( state: makeIterator() ) { iterator in
        Optional(
          (0..<maxCount).compactMap { _ in iterator.next() },
          nilWhen: \.isEmpty
        )
      }
    )
  }
}
// [ ["1", "2"], ["3", "4"], ["5", "6"], ["7"] ]"
(1...7).map(String.init)[maxArrayCount: 2]
public extension Collection {
  /// Splits a `Collection` into equal "chunks".
  ///
  /// - Parameter maxSubSequenceCount: The maximum number of elements in a chunk.
  /// - Returns: `SubSequence`s with `maxSubSequenceLength` `counts`,
  ///   until the last chunk, which may be smaller.
  subscript(maxSubSequenceCount maxCount: Int) -> AnySequence<SubSequence> {
    .init(
      sequence(state: startIndex) { startIndex in
        guard startIndex < self.endIndex
        else { return nil }

        let endIndex =
          self.index(startIndex, offsetBy: maxCount, limitedBy: self.endIndex)
          ?? self.endIndex
        defer { startIndex = endIndex }
        return self[startIndex..<endIndex]
      }
    )
  }
}
// ["12", "34", "56", "7"]
(1...7).map(String.init).joined()[maxSubSequenceCount: 2]