我有一个带有一些重复的数组。一种基于重复计数对阵列进行排序的有效算法,例如:
['d@me.com', 'z@gmail.com', 'e@me.com', 'b@me.com', 'c@me.com', 'z@gmail.com', 'z@gmail.com', 'b@me.com', 'e@me.com']
=>
['z@gmail.com', 'e@me.com', 'b@me.com', 'd@me.com', 'c@me.com']
因为计数如下[3, 2, 2, 1, 1]
我想出了:
const itemCounts = {}
const ordereditems = []
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
}
const tuples = []
for (let key in itemCounts) {
tuples.push([key, itemCounts[key]])
}
return tuples.sort((a, b) => a[1] < b[1]).map(x => x[0])
哪个是Θ(3 N + N log N)
?
也许用lodash更快的东西?
在计算时可能会保留一个排序的优先级队列?
也许使用某种基数类型?
答案 0 :(得分:2)
这是我的尝试,并首次尝试编写代码片段:)
var allitems = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'];
function original_code(allitems) {
const itemCounts = {}
const ordereditems = []
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
}
const tuples = []
for (let key in itemCounts) {
tuples.push([key, itemCounts[key]])
}
return tuples.sort((a, b) => a[1] < b[1]).map(x => x[0])
}
function myTry(allitems) {
var arr;
const dic = {};
arr = [];
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
if (!dic.hasOwnProperty(item)) {
dic[item] = 1;
arr.push(item);
} else
dic[item]++;
}
arr.sort((a, b) => dic[b] - dic[a]);
return arr;
}
//measure attempts
{ //original code
var t0 = performance.now();
var res;
for (let j = 0; j < 10000; j++) {
res = original_code(allitems);
}
var t1 = performance.now();
console.log("original " + (t1 - t0) + " milliseconds.");
console.log("original " + res);
}
{ //my try
var t0 = performance.now();
var res;
for (let j = 0; j < 10000; j++) {
res = myTry(allitems);
}
var t1 = performance.now();
console.log("myTry " + (t1 - t0) + " milliseconds.");
console.log("myTry " + res);
}
{ //my try
var t0 = performance.now();
var res;
for (let j = 0; j < 10000; j++) {
res = myTry(allitems);
}
var t1 = performance.now();
console.log("myTry2 " + (t1 - t0) + " milliseconds.");
console.log("myTry2 " + res);
}
{ //original code
var t0 = performance.now();
var res;
for (let j = 0; j < 10000; j++) {
res = original_code(allitems);
}
var t1 = performance.now();
console.log("original2 " + (t1 - t0) + " milliseconds.");
console.log("original2 " + res);
}
&#13;
修改强>
我试图做出更可靠的测量。如果有更好的方法,如果你告诉我,我会很感激。
+ 改变了排序。
答案 1 :(得分:1)
使用array#reduce
创建一个频率对象,其中包含数组中此单词的字和频率。然后根据对象值对值进行排序,然后取出所有单词。
var arr = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'],
frequency = arr.reduce((r,val) => {
r[val] = r[val] || {val, count : 0};
r[val].count = r[val].count + 1;
return r;
},{});
var result = Object.values(frequency).sort((a,b) => {
return b.count - a.count;
}).map(({val}) => val);
console.log(result);
&#13;
答案 2 :(得分:0)
我没有时间测量过。 但是这里有一个想法,使用过滤器函数,创建一个uniqueitems项目数组,然后使用它来填充itemcounts数组,从而从计数循环中删除分支。
const allitems= ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'];
const itemCounts = {};
var uniqueitems = [];
var ordereditems = [];
// Filter out unique item list early
uniqueitems=allitems.filter( (v,i,s) => s.indexOf(v) === i );
// Set items counts of unique items to zero,
for (let i = 0; i < uniqueitems.length; i++) {
let item = allitems[i];
itemCounts[item] =0;
}
// Count occurences in allitems
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
itemCounts[item] ++;
}
// Sort unique items,directly based on their itemCount
ordereditems=uniqueitems.sort((a, b) => itemCounts[a] < itemCounts[b]);
console.log(ordereditems);
答案 3 :(得分:0)
我直觉地期望log-linear对于这个算法规范来说是最优的,但事实上这里是(摊销的)O(n)实现的要点我通过思考使用哈希表的魔力(JS中的对象)让我感到惊讶)!这是以占用更多空间为代价的。您需要一个双向链表实现。我会在这里破解一个,但可能有更适合的库。
printList
函数用于调试,如果您有兴趣了解其工作原理,那么它可能很有用。
function orderByCount(allitems) { // allitems is the array to be 'count-sorted'
// linkedList will have at most one node per item. It will be used to build the result.
// It will be ordered by count (seen so far) descending
let linkedListHead = null
let linkedListTail = null
// if linkedList has a node for key k, itemIndex[k] is that node
// if linkedList has no node for key k, itemIndex[k] is undefined
let itemIndex = {}
// for all x >= 1 if linkedList has a node with seen count <= x, countIndex[x] is the 'headmost' (first) node whose count <= x.
// for all x >= 1 if linkedList has no node with seen count <= x, countIndex[x] is undefined
let countIndex = {}
// iterate over the input and maintain above invariants
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
if (itemIndex.hasOwnProperty(item)) {
// we've already seen this item; update while preserving invariants
const linkedNode = itemIndex[item]
// first, remove the node, preserving invariants
if (countIndex[linkedNode.count] === linkedNode) {
countIndex[linkedNode.count] = linkedNode.next
}
if (linkedNode.previous) {
linkedNode.previous.next = linkedNode.next
} else {
linkedListHead = linkedNode.next
}
if (linkedNode.next) {
linkedNode.next.previous = linkedNode.previous
} else {
linkedListTail = linkedNode.previous
}
linkedNode.next = linkedNode.previous = null
// now update the node
linkedNode.count++
// and add it back where it now belongs, preserving invariants
if (countIndex.hasOwnProperty(linkedNode.count)) {
// need to put it at the front of this count-block
newNext = countIndex[linkedNode.count]
newPrevious = newNext.previous
linkedNode.next = newNext
linkedNode.previous = newPrevious
newNext.previous = linkedNode
if (newPrevious) {
newPrevious.next = linkedNode
} else {
linkedListHead = linkedNode
}
countIndex[linkedNode.count] = linkedNode
} else {
// it's the new greatest; create a new count-block
countIndex[linkedNode.count] = linkedNode
if (linkedListHead) {
linkedListHead.previous = linkedNode
}
linkedNode.next = linkedListHead
linkedListHead = linkedNode
}
} else {
// this is a new item
const linkedNode = {
item: item,
count: 1,
previous: null,
next: null
}
// First, insert it at the tail
linkedNode.previous = linkedListTail
if (linkedListTail) {
linkedListTail.next = linkedNode
}
linkedListTail = linkedNode
// now index it
itemIndex[item] = linkedNode
if (linkedListHead === null) {
linkedListHead = linkedNode
}
if (!countIndex.hasOwnProperty(1)) {
countIndex[1] = linkedNode
}
}
}
// turn it into a normal array as per specification
const result = []
let current = linkedListHead
while (current != null) {
result.push(current.item)
current = current.next
}
return result
}
function printList(linkedListHead, linkedListTail, countIndex) {
let current = linkedListHead
while (current != null) {
toLog = JSON.stringify({
item: current.item,
count: current.count
})
if (linkedListHead === current) {
toLog += " (HEAD)"
}
if (linkedListTail === current) {
toLog += " (TAIL)"
}
if (countIndex[current.count] === current) {
toLog += " <-- " + current.count
}
console.log(toLog)
current = current.next
}
console.log()
}
const allItems = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e']
console.log(orderByCount(allItems))
答案 4 :(得分:0)
我认为这是一个更好的解决方案,使用arr.concat
代替排序。
请注意,对象中的键是有序的:
const f = (allitems) => {
const itemCounts = {}
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
}
const map = {}
for (let key in itemCounts) {
const count = itemCounts[key]
if(!map[count]) {
map[count] = []
}
map[count].push(key)
}
let ordereditems = []
for (let key in map) {
ordereditems = ordereditems.concat(map[key])
}
return ordereditems.reverse()
}
console.log(f(['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e']))
对此没有多少测试用例,所以它可能不太健壮。
答案 5 :(得分:-1)
['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'].sort((x,y)=> x.charCodeAt()-y.charCodeAt())