按频率计数订购电子邮件地址数组

时间:2018-01-30 02:01:12

标签: javascript algorithm sorting

我有一个带有一些重复的数组。一种基于重复计数对阵列进行排序的有效算法,例如:

['d@me.com', 'z@gmail.com', 'e@me.com', 'b@me.com', 'c@me.com', 'z@gmail.com', 'z@gmail.com', 'b@me.com', 'e@me.com']
=>
['z@gmail.com', 'e@me.com', 'b@me.com', 'd@me.com', 'c@me.com']

因为计数如下[3, 2, 2, 1, 1]

我想出了:

const itemCounts = {}
const ordereditems = []
for (let i = 0; i < allitems.length; i++) {
  let item = allitems[i];
  itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
}
const tuples = []
for (let key in itemCounts) {
  tuples.push([key, itemCounts[key]])
}
return tuples.sort((a, b) => a[1] < b[1]).map(x => x[0])

哪个是Θ(3 N + N log N)

也许用lodash更快的东西?

在计算时可能会保留一个排序的优先级队列?

也许使用某种基数类型?

6 个答案:

答案 0 :(得分:2)

这是我的尝试,并首次尝试编写代码片段:)

&#13;
&#13;
var allitems = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'];

function original_code(allitems) {
  const itemCounts = {}
  const ordereditems = []
  for (let i = 0; i < allitems.length; i++) {
    let item = allitems[i];
    itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
  }
  const tuples = []
  for (let key in itemCounts) {
    tuples.push([key, itemCounts[key]])
  }
  return tuples.sort((a, b) => a[1] < b[1]).map(x => x[0])
}

function myTry(allitems) {
  var arr;
  const dic = {};
  arr = [];
  for (let i = 0; i < allitems.length; i++) {
    let item = allitems[i];
    if (!dic.hasOwnProperty(item)) {
      dic[item] = 1;
      arr.push(item);
    } else
      dic[item]++;
  }
  arr.sort((a, b) => dic[b] - dic[a]);
  return arr;
}



//measure attempts
{ //original code
  var t0 = performance.now();
  var res;
  for (let j = 0; j < 10000; j++) {
    res = original_code(allitems);
  }
  var t1 = performance.now();
  console.log("original " + (t1 - t0) + " milliseconds.");
  console.log("original " + res);
}

{ //my try
  var t0 = performance.now();
  var res;
  for (let j = 0; j < 10000; j++) {
    res = myTry(allitems);
  }
  var t1 = performance.now();
  console.log("myTry " + (t1 - t0) + " milliseconds.");
  console.log("myTry " + res);
}



{ //my try
  var t0 = performance.now();
  var res;
  for (let j = 0; j < 10000; j++) {
    res = myTry(allitems);
  }
  var t1 = performance.now();
  console.log("myTry2 " + (t1 - t0) + " milliseconds.");
  console.log("myTry2 " + res);
}

{ //original code
  var t0 = performance.now();
  var res;
  for (let j = 0; j < 10000; j++) {
    res = original_code(allitems);
  }
  var t1 = performance.now();
  console.log("original2 " + (t1 - t0) + " milliseconds.");
  console.log("original2 " + res);
}
&#13;
&#13;
&#13;

修改
我试图做出更可靠的测量。如果有更好的方法,如果你告诉我,我会很感激。

+ 改变了排序。

答案 1 :(得分:1)

使用array#reduce创建一个频率对象,其中包含数组中此单词的字和频率。然后根据对象值对值进行排序,然后取出所有单词。

&#13;
&#13;
var arr = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'],
    frequency = arr.reduce((r,val) => {
      r[val] = r[val] || {val, count : 0};
      r[val].count = r[val].count + 1;
      return r;
    },{});
var result = Object.values(frequency).sort((a,b) => {
  return b.count - a.count;
}).map(({val}) => val);
console.log(result);
&#13;
&#13;
&#13;

答案 2 :(得分:0)

我没有时间测量过。 但是这里有一个想法,使用过滤器函数,创建一个uniqueitems项目数组,然后使用它来填充itemcounts数组,从而从计数循环中删除分支。

const allitems= ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'];
const itemCounts = {};
var uniqueitems = [];
var ordereditems = [];

// Filter out unique item list early
uniqueitems=allitems.filter( (v,i,s) => s.indexOf(v) === i );

// Set items counts of unique items to zero,
for (let i = 0; i < uniqueitems.length; i++) {
  let item = allitems[i];
  itemCounts[item] =0;
}

// Count occurences in allitems
for (let i = 0; i < allitems.length; i++) {
  let item = allitems[i];
  itemCounts[item]  ++;
}

// Sort unique items,directly based on their itemCount
ordereditems=uniqueitems.sort((a, b) => itemCounts[a] < itemCounts[b]);

console.log(ordereditems);

答案 3 :(得分:0)

我直觉地期望log-linear对于这个算法规范来说是最优的,但事实上这里是(摊销的)O(n)实现的要点我通过思考使用哈希表的魔力(JS中的对象)让我感到惊讶)!这是以占用更多空间为代价的。您需要一个双向链表实现。我会在这里破解一个,但可能有更适合的库。

printList函数用于调试,如果您有兴趣了解其工作原理,那么它可能很有用。

function orderByCount(allitems) { // allitems is the array to be 'count-sorted'
  // linkedList will have at most one node per item. It will be used to build the result.
  // It will be ordered by count (seen so far) descending
  let linkedListHead = null
  let linkedListTail = null

  // if linkedList has a node for key k, itemIndex[k] is that node
  // if linkedList has no node for key k, itemIndex[k] is undefined
  let itemIndex = {}

  // for all x >= 1 if linkedList has a node with seen count <= x, countIndex[x] is the 'headmost' (first) node whose count <= x.
  // for all x >= 1 if linkedList has no node with seen count <= x, countIndex[x] is undefined
  let countIndex = {}

  // iterate over the input and maintain above invariants
  for (let i = 0; i < allitems.length; i++) {
    let item = allitems[i];

    if (itemIndex.hasOwnProperty(item)) {
      // we've already seen this item; update while preserving invariants
      const linkedNode = itemIndex[item]

      // first, remove the node, preserving invariants
      if (countIndex[linkedNode.count] === linkedNode) {
        countIndex[linkedNode.count] = linkedNode.next
      }
      if (linkedNode.previous) {
        linkedNode.previous.next = linkedNode.next
      } else {
        linkedListHead = linkedNode.next
      }
      if (linkedNode.next) {
        linkedNode.next.previous = linkedNode.previous
      } else {
        linkedListTail = linkedNode.previous
      }
      linkedNode.next = linkedNode.previous = null

      // now update the node
      linkedNode.count++

        // and add it back where it now belongs, preserving invariants
        if (countIndex.hasOwnProperty(linkedNode.count)) {
          // need to put it at the front of this count-block
          newNext = countIndex[linkedNode.count]
          newPrevious = newNext.previous
          linkedNode.next = newNext
          linkedNode.previous = newPrevious
          newNext.previous = linkedNode
          if (newPrevious) {
            newPrevious.next = linkedNode
          } else {
            linkedListHead = linkedNode
          }
          countIndex[linkedNode.count] = linkedNode
        } else {
          // it's the new greatest; create a new count-block
          countIndex[linkedNode.count] = linkedNode
          if (linkedListHead) {
            linkedListHead.previous = linkedNode
          }
          linkedNode.next = linkedListHead
          linkedListHead = linkedNode
        }
    } else {
      // this is a new item
      const linkedNode = {
        item: item,
        count: 1,
        previous: null,
        next: null
      }

      // First, insert it at the tail
      linkedNode.previous = linkedListTail
      if (linkedListTail) {
        linkedListTail.next = linkedNode
      }
      linkedListTail = linkedNode

      // now index it
      itemIndex[item] = linkedNode
      if (linkedListHead === null) {
        linkedListHead = linkedNode
      }
      if (!countIndex.hasOwnProperty(1)) {
        countIndex[1] = linkedNode
      }
    }
  }

  // turn it into a normal array as per specification 
  const result = []
  let current = linkedListHead
  while (current != null) {
    result.push(current.item)
    current = current.next
  }

  return result
}

function printList(linkedListHead, linkedListTail, countIndex) {
  let current = linkedListHead
  while (current != null) {
    toLog = JSON.stringify({
      item: current.item,
      count: current.count
    })
    if (linkedListHead === current) {
      toLog += " (HEAD)"
    }
    if (linkedListTail === current) {
      toLog += " (TAIL)"
    }
    if (countIndex[current.count] === current) {
      toLog += " <-- " + current.count
    }
    console.log(toLog)
    current = current.next
  }
  console.log()
}

const allItems = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e']
console.log(orderByCount(allItems))

答案 4 :(得分:0)

我认为这是一个更好的解决方案,使用arr.concat代替排序。

请注意,对象中的键是有序的:

const f = (allitems) => {
  const itemCounts = {}
  for (let i = 0; i < allitems.length; i++) {
    let item = allitems[i];
    itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
  }
  
  const map = {}
  for (let key in itemCounts) {
    const count = itemCounts[key]
    if(!map[count]) {
      map[count] = []
    }
    map[count].push(key)
  }
  
  let ordereditems = []
  for (let key in map) {
    ordereditems = ordereditems.concat(map[key])
  }
  return ordereditems.reverse()
}
console.log(f(['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e']))

对此没有多少测试用例,所以它可能不太健壮。

答案 5 :(得分:-1)

['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'].sort((x,y)=> x.charCodeAt()-y.charCodeAt())