在两个数组之间进行更改的算法

时间:2010-08-13 12:17:18

标签: javascript arrays performance algorithm

我需要创建一个算法,它会(有效地)获取一个旧数组和一个新数组,并让我回复两者之间的变化(添加了哪些项目,删除了哪些项目)。它恰好需要使用JavaScript(在浏览器中运行),但算法比语言更重要。

这就是我想出的:http://jsbin.com/osewu3/13。任何人都可以看到任何问题/建议任何改进吗?

谢谢!

代码清单:

function diff(o, n) {
  // deal with empty lists
  if (o == undefined) o = [];
  if (n == undefined) n = [];

  // sort both arrays (or this won't work)
  o.sort(); n.sort();

  // don't compare if either list is empty
  if (o.length == 0 || n.length == 0) return {added: n, removed: o};

  // declare temporary variables
  var op = 0; var np = 0;
  var a = []; var r = [];

  // compare arrays and add to add or remove lists
  while (op < o.length && np < n.length) {
      if (o[op] < n[np]) {
          // push to diff?
          r.push(o[op]);
          op++;
      }
      else if (o[op] > n[np]) {
          // push to diff?
          a.push(n[np]);
          np++;
      }
      else {
          op++;np++;
      }
  }

  // add remaining items
  if( np < n.length )
    a = a.concat(n.slice(np, n.length));
  if( op < o.length )
    r = r.concat(o.slice(op, o.length));

  return {added: a, removed: r}; 
}

(我还发布了这个作为另一个SO问题的潜在解决方案,在这里:JavaScript array difference

8 个答案:

答案 0 :(得分:3)

没有undefined常数。您应该检查变量的类型:

if (typeof o === 'undefined') o = [];

编辑:

正如Tim Down所示,该属性实际上是在标准中定义的,但由于标准没有将其定义为常量,因此它不可靠且不应使用。

答案 1 :(得分:3)

我在两种可能的实现之间创建了速度测试。

源代码:

function diff1 (o, n) { 
  // deal with empty lists 
  if (o == undefined) o = []; 
  if (n == undefined) n = []; 

  // sort both arrays (or this won't work) 
  o.sort(); n.sort(); 

  // don't compare if either list is empty 
  if (o.length == 0 || n.length == 0) return {added: n, removed: o}; 

  // declare temporary variables 
  var op = 0; var np = 0; 
  var a = []; var r = []; 

  // compare arrays and add to add or remove lists 
  while (op < o.length && np < n.length) { 
      if (o[op] < n[np]) { 
          // push to diff? 
          r.push(o[op]); 
          op++; 
      } 
      else if (o[op] > n[np]) { 
          // push to diff? 
          a.push(n[np]); 
          np++; 
      } 
      else { 
          op++;np++; 
      } 
  } 

  // add remaining items 
  if( np < n.length ) 
    a = a.concat(n.slice(np, n.length)); 
  if( op < o.length ) 
    r = r.concat(o.slice(op, o.length)); 

  return {added: a, removed: r};  
}

function diff2 (o, n) {
        // convert array items to object members
    var objO = {}, objN = {};
    for (var i = 0; i < o.length; i++) {
        objO[o[i]] = 1;
    }
    for (var i = 0; i < n.length; i++) {
        objN[n[i]] = 1;
    }

    var a = []; var r = []; 

    for (var i in objO) {
        if (i in objN) {
            delete objN[i];
        }
        else {
            r.push (i);
        }
    }
    for (var i in objN) {
        a.push (i);
    }
    return {added: a, removed: r};
}

var o = [], n = [];
for (var i = 0; i < 300000; i++) {
    if (i % 2 == 0) {
        o.push (i);
    }
    if (i % 3 == 0) {
        n.push (i);
    }
}

var start = new Date ();
diff1 (o, n);
var end1 = new Date ();
diff2 (o, n);
var end2 = new Date ();

alert ((end1 - start) + ", " + (end2 - end1));

diff2的缺点是返回的数组(添加,删除)没有排序。

速度测试:

IE7:diff1:2578ms, diff2:1906ms

IE8:diff1:1953ms, diff2:1152ms

Firefox: diff1:254ms ,diff2:527ms

Opera: diff1:143ms ,diff2:253ms

Safari: diff1:466ms ,diff2:657ms

Chrome:diff1:734ms, diff2:581ms

结论:在Firefox,Opera和Safari中diff1更快,IE和Chrome中的diff2更快。

答案 2 :(得分:1)

你的算法看起来非常适合自己想出来!恭喜!
请记住,当您的算法显示两个数组内容的更改(项目删除等)时,它不会显示内容顺序的更改(或删除的项目稍后再次添加)。

例如,你可以移除数组a的第1项并在以后添加它,从技术上改变数组b中的数组a,但是你的算法仍然没有注意到它。

array a: {1, 2, 3, 4, 5, 6}

array b: {1, 2, 3, 4, 5, 6}

array a: {2, 3, 4, 5, 6}    //after a.shift();
array a: {2, 3, 4, 5, 6, 1} //after a.push(1);

=> a != b //your algorithm would return "a == b" though

你的算法可能足以满足你的特殊需求,但是对于一个非常严格的数组diff算法,我会尝试移植一个字符串diff算法。 基本上改变算法,而不是比较字符串中的字符/运行,它比较数组中的项目。

Javascript string diff algorithmJS Code

答案 3 :(得分:0)

以下页面有一个函数可以从另一个数组中删除一个数组,并可用于为您提供2个值。 Remove items from a JavaScript Array with RemoveArrayItems()

var newItemsAdded=RemoveArrayItems(oldArray,newArray);
var ItemsRemoved =RemoveArrayItems(newArray,oldArray);

答案 4 :(得分:0)

我认为diff方法的实现是正确的,因为排序方法,算法的时间复杂度为O(n * log(n))。如果使用数组,则需要在比较之前对它们进行排序,并且排序算法的时间复杂度至少为O(n * log(n))。

如果o和n数组不能多次包含相同的值,那么使用对象(关联数组)代替数组可能是更好的选择。

示例:

function diff(o, n) { 
    var a = {}; var r = {}; 

    for (var i in o) {
        if (!(i in n)) {
            r[i] = 1;
        }
    }
    for (var i in n) {
        if (!(i in o)) {
            a[i] = 1;
        }
    }
    return {added: a, removed: r};
}

    // how to use
var o = {"a":1, "b":1, "ab":1};
var n = {"a":1, "aa":1};

var diffon = diff (o, n);

    // display the results
var added = "", removed = "";
for (var i in diffon.added) {
    added += i + ", ";
}
for (var i in diffon.removed) {
    removed += i + ", ";
}

alert ("added: " + added);
alert ("removed: " + removed);

那种情况下的时间复杂度是O(n)。

如果您需要有关JavaScript中的数组,关联数组的更多详细信息,我建议您使用以下链接: Array object

答案 5 :(得分:0)

// I prefer to not sort the arrays

Array.prototype.diff= function(ar){
    var a= [], i= 0, L= this.length,
    ar= ar.concat(), t1, t2;
    while(i<L){
        t1= this[i++];
        t2= ar.indexOf(t1);
        if(t2!= -1){
            ar.splice(t2, 1);
        }
        else a.push(t1);
    }
    return a;
}

Array.prototype.compare= function(a2){
    return{
        r: this.diff(a2), a: a2.diff(this)
    }
}

/* 
test

var a1= [-1, 2, 3, 3, 4, 5], a2= [0, 2, 4, 3, 5, 6, 8];
var o= a1.compare(a2);

alert('added: '+o.a+'\nremoved: '+o.r);


returned:

added: 0, 6, 8
removed: -1, 3

*/


// oldbrowser code:

if(![].indexOf){
    Array.prototype.indexOf= function(what, i){
        i= i || 0;
        var L= this.length;
        while(i< L){
            if(this[i]=== what) return i;
            ++i;
        }
        return -1;
    }
}

// Return common values instead of differences
Array.prototype.incommon= function(ar){
    var a= [], i= 0, L= this.length, a2= ar.concat(), t1, t2;
    while(i<L && a2.length){
        t1= this[i++];
        t2= a2.indexOf(t1);
        if(t2 != -1){
            a.push(a2.splice(t2, 1));
        }
    }
    return a;
}

答案 6 :(得分:0)

我使用这个功能:

function diffArray(from, to){
    /*
    result will hold the transformations (in order) that need to 
    be done to make the from array equal to the to array
    */
    var result = [];
    var fromValue, fromIndex, fromCount, fromOffset;
    var toValue, toIndex, toCount, toMap;

    /*
    Buld an index for the two arrays to speed up the process. Do
    note that due to this optimization all values in the array will
    be transformed to strings. So the number 1 will be equal to the
    string '1'. Also all objects will converted to strings (via
    toString) and therefore probably considered equal.
    */
    toMap = to.reduce(function(result, value, index){
        if(value in result) result[value].push(index);
        else result[value] = [index];
        return result;
    }, {});
    toIndex = 0;
    toCount = to.length;

    fromOffset = 0;
    fromIndex = 0;
    fromCount = from.length;

    /*
    loop until we reach the end of one of the two arrays
    */
    while(fromIndex < fromCount && toIndex < toCount){
        fromValue = from[fromIndex];
        toValue = to[toIndex];

        /*
        when the two values are equal, no transformation is required.
        */
        if(fromValue === toValue){
            fromIndex++;
            toIndex++;
        }
        else{
            /*
            if fromValue is not in the remainder of the to array
            */
            // if(~to.indexOf(fromValue, toIndex)){ 
            if(
                fromValue in toMap
                && toMap[fromValue].some(function(value){
                    return toIndex <= value;
                })
            ){
                result.push({
                    type: 'insert'
                    , index: fromIndex + fromOffset
                    , value: toValue
                });
                toIndex++
                fromOffset++;
            }
            else{
                result.push({
                    type: 'delete'
                    , index: toIndex
                    , value: fromValue
                });
                fromIndex++
                fromOffset--;
            }

        }

    }

    return result
    /*
    add the remainder of the from array to the result as deletions
    */
    .concat(
        from
        .slice(fromIndex)
        .map(function(value, index){
            var result = {
                type: 'delete'
                , index: index + fromIndex + fromOffset
                , value: value
            };
            fromOffset--;
            return result;
        })
    )
    /*
    add the remainder of the to array to the result as insertions
    */
    .concat(
        to
        .slice(toIndex)
        .map(function(value, index){
            var result = {
                type: 'insert'
                , index: index + toIndex
                , value: value
            };
            fromOffset++;
            return result;
        })
    )
    ;

}//diffArray

查看存储库以获取更新和单元测试: https://github.com/elmerbulthuis/diff-array

答案 7 :(得分:0)

用于关联数组的PHP解决方案,将插入/更新/删除拆分为单独的数组:

/**
 * compute differences between associative arrays.
 * the inserts, updates, deletes are returned
 * in separate arrays. Note: To combine arrays
 * safely preserving numeric keys, use $a + $b
 * instead of array_merge($a, $b).
 *
 * Author: Monte Ohrt <monte@ohrt.com>
 * Version: 1.0
 * Date: July 17th, 2014
 *
 * @param array $a1
 * @param array $a2
 * @return array ($inserts, $updates, $deletes)
 */
get_array_differences($a1, $a2) {
    // get diffs forward and backward
    $forward = array_diff_assoc($a1, $a2);
    $backward = array_diff_assoc($a2, $a1);
    // compute arrays
    $inserts = array_diff_key($forward, $backward);
    $updates = array_intersect_key($forward, $backward);
    $deletes = array_diff_key($backward, $forward);
    return array($inserts, $updates, $deletes);
}

https://gist.github.com/mohrt/f7ea4e2bf2ec8ba7542c