如何计算两个列表的增量(插入/删除/移动的索引)?

时间:2015-06-04 20:38:09

标签: algorithm swift

假设我有两个具有唯一ID的对象列表和一个确定其顺序的属性,我如何有效地获取delta索引(插入了哪些索引,哪些已删除,哪些已被移动)?

输入示例:

let before: [(id: String, timestamp: String)] = [
    ("A", "2015-06-04T12:38:09Z"),
    ("B", "2015-06-04T10:12:45Z"),
    ("C", "2015-06-04T08:39:55Z"),
    ("D", "2015-06-03T23:58:32Z"),
    ("E", "2015-06-01T00:05:51Z"),
]

let after: [(id: String, timestamp: String)] = [
    ("F", "2015-06-04T16:13:01Z"),
    ("C", "2015-06-04T15:10:29Z"),
    ("A", "2015-06-04T12:38:09Z"),
    ("B", "2015-06-04T10:12:45Z"),
]

let delta = deltaFn(before, after)

以上是可视化的:

BEFORE                                   AFTER
+-------+----+----------------------+    +-------+----+----------------------+
| index | id | timestamp            |    | index | id | timestamp            |
+-------+----+----------------------+    +-------+----+----------------------+
|     0 |  A | 2015-06-04T12:38:09Z |    |     0 |  F | 2015-06-04T16:13:01Z |
|     1 |  B | 2015-06-04T10:12:45Z |    |     1 |  C | 2015-06-04T15:10:29Z |
|     2 |  C | 2015-06-04T08:39:55Z |    |     2 |  A | 2015-06-04T12:38:09Z |
|     3 |  D | 2015-06-03T23:58:32Z |    |     3 |  B | 2015-06-04T10:12:45Z |
|     4 |  E | 2015-06-01T00:05:51Z |    |     - |    |                      |
+-------+----+----------------------+    +-------+----+----------------------+

预期结果(delta):

Inserted indexes:  [0]
Deleted indexes:   [3, 4]
Moved indexes:     [(from: 0, to: 2), (from: 1, to: 3), (from: 2, to: 1)]

4 个答案:

答案 0 :(得分:2)

一种可能的解决方案(类似于@ amit&#39的答案,但只使用一个 地图):

// A dictionary mapping each id to a pair
//    ( oldIndex, newIndex )
// where oldIndex = -1 for inserted elements
// and newIndex = -1 for deleted elements.
var map : [ String : (from: Int, to: Int)] = [:]

// Add [ id : (from, -1) ] for each id in before:
for (idx, elem) in enumerate(before) {
    map[elem.id] = (from: idx, to: -1)
}

// Update [ id : (from, to) ] or add [ id : (-1, to) ] for each id in after:
for (idx, elem) in enumerate(after) {
    if (map[elem.id]?.to = idx) == nil {
        map[elem.id] = (from: -1, to: idx)
    }
}

var insertedIndices : [Int] = []
var deletedIndices : [Int] = []
var movedIndices : [(from: Int, to: Int)] = []

// Compare from: and to: index for each dictionary value:
for pair in map.values {
    switch pair {
    case (let fromIdx, -1):
        deletedIndices.append(fromIdx)
    case (-1, let toIdx):
        insertedIndices.append(toIdx)
    default:
        movedIndices.append(pair)
    }
}

println(insertedIndices) // [0]
println(deletedIndices)  // [3, 4]
println(movedIndices)    // [(1, 3), (0, 2), (2, 1)]

或者,使用 optionals 来表示缺少旧索引或新索引,如@doisk所示:

// A dictionary mapping each id to a pair
//    ( oldIndex, newIndex )
// where oldIndex = nil for inserted elements
// and newIndex = nil for deleted elements.
var map : [ String : (from: Int?, to: Int?)] = [:]

// Add [ id : (from, nil) ] for each id in before:
for (idx, elem) in enumerate(before) {
    map[elem.id] = (from: idx, to: nil)
}

// Update [ id : (from, to) ] or add [ id : (nil, to) ] for each id in after:
for (idx, elem) in enumerate(after) {
    map[elem.id] = (map[elem.id]?.from, idx)
}

// Compare:
var insertedIndices : [Int] = []
var deletedIndices : [Int] = []
var movedIndices : [(from: Int, to: Int)] = []

for pair in map.values {
    switch pair {
    case (let .Some(fromIdx), let .Some(toIdx)):
        movedIndices.append(from: fromIdx, to: toIdx)
    case (let .Some(fromIdx), .None):
        deletedIndices.append(fromIdx)
    case (.None, let .Some(toIdx)):
        insertedIndices.append(toIdx)
    default:
        fatalError("Oops") // This should not happen!
    }
}

答案 1 :(得分:1)

我的解决方案不使用地图功能。计算复杂度为O(n * m),其中n: elms in beforem: elms in after

我担心这不是最好的解决方案......但是这里是:)

import Foundation

// Elm class that contains id and timestamp and is Equatable
class Elm {
    let id : String
    let timestamp : String
    init(tuple : (id:String, timestamp:String)) {
        self.id = tuple.id
        self.timestamp = tuple.timestamp
    }
}
func ==(lhs: Elm, rhs: Elm) -> Bool {
    return lhs.id == rhs.id
}
extension Elm : Equatable {}

// data
let before: [Elm] = [
    Elm(tuple: ("A", "2015-06-04T12:38:09Z")),
    Elm(tuple: ("B", "2015-06-04T10:12:45Z")),
    Elm(tuple: ("C", "2015-06-04T08:39:55Z")),
    Elm(tuple: ("D", "2015-06-03T23:58:32Z")),
    Elm(tuple: ("E", "2015-06-01T00:05:51Z"))
]

let after: [Elm] = [
    Elm(tuple: ("F", "2015-06-04T16:13:01Z")),
    Elm(tuple: ("C", "2015-06-04T15:10:29Z")),
    Elm(tuple: ("A", "2015-06-04T12:38:09Z")),
    Elm(tuple: ("B", "2015-06-04T10:12:45Z"))
]

// O(m * n)
func inserted(before:[Elm], after:[Elm]) -> [Int] {
    var inserted = [Int]()
    for (index, elm) in enumerate(after) {
        if !contains(before, elm) {
            inserted.append(index)
        }
    }
    return inserted
}

// O(n * m)
func deleted(before:[Elm], after:[Elm]) -> [Int] {
    var deleted = [Int]()
    for (index, elm) in enumerate(before) {
        if !contains(after, elm) {
            deleted.append(index)
        }
    }
    return deleted
}

// O(n * m)
func moved(before:[Elm], after:[Elm]) -> [Int:Int] {
    var moved = [Int:Int]()
    for (index, elm) in enumerate(before) {
        if contains(after, elm) && (after[index] != before[index]) {
            moved[index] = find(after, elm)
        }
    }
    return moved
}

inserted(before, after)
deleted(before, after)
moved(before, after)

答案 2 :(得分:0)

这是我管理的内容:

var map: [String : (bef: Int?, aft: Int?)] = [:]

for (idx, (bef, aft)) in zipWithPadding(before, after).enumerate()
  where bef?.id != aft?.id {
  bef.map{map[$0.id] = (idx, map[$0.id]?.aft)}
  aft.map{map[$0.id] = (map[$0.id]?.bef, idx)}
}

for (val, id) in map {
  switch id {
  case (_, nil):  print("\(val): del at \(id.bef!)")
  case (nil, _):  print("\(val): ins at \(id.aft!)")
  default:        print("\(val): mov from \(id.bef!) to \(id.aft!)")
  }
}

//D: del at 3
//E: del at 4
//F: ins at 0
//B: mov from 1 to 3
//A: mov from 0 to 2
//C: mov from 2 to 1

此方法与其他地图答案几乎相同,只是它有一个较少的循环,并且它跳过每个数组中相同的值。这里的map是字符串字典(数组中的id)和元组。元组是Int s,对应于第一个数组中给定id的索引,以及第二个数组中相同id的索引。 Int是可选的:这就是我们弄清楚每个id发生了什么的方法。如果第一个是nil而第二个不是,则插入id。如果第二个是零,那么它被删除了。如果两个Int都不是nil,则id已从第一个移到第二个。

填充地图的方法是循环遍历zipWithPadding函数的输出,该函数位于:

func zipWithPadding <
  S0: SequenceType, S1: SequenceType, E0, E1 where
  S0.Generator.Element == E0, S1.Generator.Element == E1
  > (s0: S0, _ s1: S1) -> AnyGenerator<(E0?, E1?)> {

    var (g0, g1) :
    (S0.Generator?, S1.Generator?) =
    (s0.generate(), s1.generate())

    return anyGenerator {
      let e0: E0? = g0?.next() ?? {g0 = nil; return nil}()
      let e1: E1? = g1?.next() ?? {g1 = nil; return nil}()
      return (e0 != nil || e1 != nil) ? (e0, e1) : nil
    }
}

我是从here.得到的。你不能使用标准库zip的原因是它会在其中任何一个基础序列发生时终止。但是,这里的序列长度不同。此zip函数从其两个序列参数返回连续元素的元组生成器。如果任一序列在另一个序列之前完成,则返回的后续元组将该序列的值设置为nil。这是一个例子:

Array(zipWithPadding([1, 2, 3], [1, 2]))
//[({Some 1}, {Some 1}), ({Some 2}, {Some 2}), ({Some 3}, nil)]

因为生成器在返回nil之后不能保证连续返回nil(生成器返回nil表示它已完成),所以你不能只为你的元组值调用相同的生成器。这就是为什么一旦它返回nil就将发生器本身设置为nil:这样你就不再调用它了。

但是,无论如何,数组生成器似乎在最后一个值之后返回nil。所以,如果你不介意未定义的行为:

func zipWithPadding <
  S0: SequenceType, S1: SequenceType, E0, E1 where
  S0.Generator.Element == E0, S1.Generator.Element == E1
  > (s0: S0, s1: S1) -> AnyGenerator<(E0?, E1?)> {

    var (g0, g1) = (s0.generate(), s1.generate())

    return anyGenerator {
      let (e0, e1) = (g0.next(), g1.next())
      return e0 != nil || e1 != nil ? (e0, e1) : nil
    }
}

一旦你的发电机循环通过,其余的想法很容易。将之前和之后的id放入字典中,如果它们已经不在字典中,则元组中的相应索引将设置为nil。 (如果map[$0.id]?.aft不在字典中,则$0.id将返回nil。

就效率而言,我认为这种方法有一些地方可行。它似乎更好的是它只使用一个循环而不是两个,但是自定义zipWithPadding函数增加了很多开销,单个循环实际上比两个顺序循环效率低。同样,只使用一个enumerate()似乎很有效,但同样,开销也不值得。 (值得注意的是,如果两个数组 的长度相同,那么标准库zip会给你一个非常快的选项)

这个方法允许你跳过两个数组中相同的元素,但是你不能用两个for循环来做。在一些快速测试中,看起来这个方法可以更快地处理在相同位置具有超过四分之一元素的数组。这也可以加快地图之间的循环:它只会包含差异,所以它会更短,你不需要检查变化。

更新:

我试图弄清楚如何摆脱一些开销,特别是关于发电机。我制作了一个自定义结构:

struct PaddedZipGenerator<G0: GeneratorType, G1: GeneratorType> : GeneratorType {

  typealias E0 = G0.Element
  typealias E1 = G1.Element

  typealias Element = (E0?, E1?)

  private var (g0, g1): (G0?, G1?)

  mutating func next() -> PaddedZipGenerator.Element? {
    let e0: E0? = g0?.next() ?? {g0 = nil; return nil}()
    let e1: E1? = g1?.next() ?? {g1 = nil; return nil}()
    return (e0 != nil || e1 != nil) ? (e0, e1) : nil
  }
}

struct PaddedZip<S0: SequenceType, S1: SequenceType> : SequenceType {

  typealias Generator = PaddedZipGenerator<S0.Generator, S1.Generator>

  private let (s0, s1): (S0, S1)

  func generate() -> PaddedZip.Generator {
    return PaddedZipGenerator(g0: s0.generate(), g1: s1.generate())
  }
}

func zipWithPadding<S0: SequenceType, S1: SequenceType>(s0: S0, _ s1: S1) -> PaddedZip<S0, S1> {
  return PaddedZip(s0: s0, s1: s1)
}

它似乎有效!通过一些基本测试,似乎这个zipWithPadding函数运行得非常快。它似乎比两个for循环工作得更快,即使两个列表都不包含相同的元素。

答案 3 :(得分:0)

这是我的样子:

func deltaFn(before: [(id: String, timestamp: String)], after: [(id: String, timestamp: String)] ) -> ([Int], [Int], [String]) {

    // Get arrays of just the ids...
    let beforeIds = before.map { $0.id }
    let afterIds = after.map { $0.id }

    // Get the inserted and moved indexes...
    let (inserted, moved) = reduce(0..<afterIds.count, (inserted: [Int](), moved: [String]())) { 

        (var changes, index) -> ([Int], [String]) in

        if let beforeIndex = find(beforeIds, afterIds[index])  {
            if beforeIndex != index {
                changes.moved.append("(from: \(beforeIndex), to: \(index))")
            }
        } else {
            changes.inserted.append(index)
        }
        return changes
    }

    // Get the deleted indexes...
    let deleted = reduce(0..<beforeIds.count, [Int]()) { deleted, index in
        return contains(afterIds, beforeIds[index])
            ? deleted
            : deleted + [index]
    }

    // Return them all as a tuple...
    return (inserted, deleted, moved)
}

let (inserted, deleted, moved) = deltaFn(before, after)

println("Inserted: \(inserted)")  // Inserted: [0]
println("Deleted: \(deleted)")    // Deleted: [3, 4]
println("Moved: \(moved)")        // Moved: [(from: 2, to: 1), (from: 0, to: 2), (from: 1, to: 3)]

它按预期工作,并且在眼睛上相对容易。

请注意,如果您使用的是Swift 2.0,则对reduce的调用语法会有所不同。例如,

reduce(0..<afterIds.count, (inserted: [Int](), moved: [String]()))

...变为

(0..<afterIds.count).reduce((inserted: [Int](), moved: [String]()))