我有一个包含重复数据的数组。我想根据频率对其进行排序(首先是最高重复项元素),然后删除重复项。
我尝试了下面的代码,它抛出了不同的顺序。
var arr=[
{name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22"},
{name: "", sys_id: ""},
{name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
{name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22"},
{name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
{name: "SNOW Change Request", sys_id: "325b08913783d6c4f4f4c97a43990e90"},
{name: "", sys_id: ""},
{name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
{name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
{name: "Incident Request", sys_id: "3ee543f61b363740f713ed7b2f4bcbc0"}
];
console.log(arr.reduce((x, y) => {
if (x[y.name]) {
x[y.name]++;
return x;
} else {
var z = {};
z[y.name] = 1;
return Object.assign(x, z);
}
}, {}))
答案 0 :(得分:1)
不是最佳解决方案,但您可以将其作为起点:
var arr = [{
"name": "Manage AAA Devices",
"sys_id": "7b491aad371adb003ef7a9c2b3990e22"
},
{
"name": "",
"sys_id": ""
},
{
"name": "Manage AAA - ISE Admin Functions",
"sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
},
{
"name": "Manage AAA Devices",
"sys_id": "7b491aad371adb003ef7a9c2b3990e22"
},
{
"name": "Manage AAA - ISE Admin Functions",
"sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
},
{
"name": "SNOW Change Request",
"sys_id": "325b08913783d6c4f4f4c97a43990e90"
},
{
"name": "",
"sys_id": ""
},
{
"name": "Manage AAA - ISE Admin Functions",
"sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
},
{
"name": "Manage AAA - ISE Admin Functions",
"sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
},
{
"name": "Incident Request",
"sys_id": "3ee543f61b363740f713ed7b2f4bcbc0"
}
];
const noOfOccurencies = arr.reduce((x, y) => {
if (x[y.name]) {
x[y.name]++;
return x;
} else {
var z = {};
z[y.name] = 1;
return Object.assign(x, z);
}
}, {})
console.log(noOfOccurencies);
// Order an array of objects based on another array order
// https://gist.github.com/ecarter/1423674
function mapOrder(array, order, key) {
array.sort(function(a, b) {
var A = a[key],
B = b[key];
if (order.indexOf(A) > order.indexOf(B)) {
return 1;
} else {
return -1;
}
});
return array;
};
// sort noOfOccurencies
const itemOrder = Object.keys(noOfOccurencies).sort(function(a, b) {
return noOfOccurencies[a] - noOfOccurencies[b]
})
// reverse noOfOccurencies (*highest duplicates element first*)
itemOrder.reverse();
console.log(itemOrder);
// order the original array based on itemOrder
let ordered_array = mapOrder(arr, itemOrder, 'name');
// remove duplicates from the new array ordered
const noDuplicatesArr = ordered_array.filter((v, i, a) => a.findIndex(t => (t.name === v.name)) === i)
console.log(noDuplicatesArr);
答案 1 :(得分:1)
let arr = [
{name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22"},
{name: "", sys_id: ""},
{name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
{name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22"},
{name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
{name: "SNOW Change Request", sys_id: "325b08913783d6c4f4f4c97a43990e90"},
{name: "", sys_id: ""},
{name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
{name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
{name: "Incident Request", sys_id: "3ee543f61b363740f713ed7b2f4bcbc0"},
];
function sortByFrequency(arrayWithDuplicates) {
let tempArray = [];
let temp = [];
arrayWithDuplicates.forEach((item) => {
temp.push(JSON.stringify(item));
}
);
(Array(...new Set(temp))).forEach((x) => {
tempArray.push(arrayWithDuplicates.filter((y) => {
return x === JSON.stringify(y);
}));
});
tempArray.sort((a, b) => {
return a.length - b.length;
});
temp = [];
tempArray.forEach((item) => {
temp.push({name: item[0].name, frequency: item.length});
});
return temp.reverse();
}
console.log(sortByFrequency(arr));
答案 2 :(得分:1)
我认为最简单的方法可能只是将原始项目保留在要创建的字典中,然后再使用该字典进行排序。无论如何,字典中将仅包含1个项目,并且该项目将具有项目计数。
因此您将略微更改reduce
函数
function reduceWithCount( arr, prop ) {
return arr.reduce( (agg, item) => {
const key = item[prop];
if (!agg[key]) {
agg[key] = { count: 0, item };
}
agg[key].count++;
return agg;
}, {});
}
然后根据记录的频率创建一个新数组,您可以将其映射回去并使用之前从字典对象中保存的计数
function createArrayFromObject( countedDictionary, prop ) {
return Object.keys( countedDictionary )
.map( key => countedDictionary[key].item )
.sort( (a, b) => countedDictionary[b[prop]].count - countedDictionary[a[prop]].count );
}
但是,这意味着使用这些功能,您将不得不两次传递密钥,但是我认为这并没有太大的开销。
我稍微更改了代码段的代码,以使其不使用您的原始数组,而是随机生成了一个数组,以查看它将如何处理50.000个条目。
只要原始数组中没有50000个不同的名称,使用简单的字典查找就可以了。
const seed = {
'Manage AAA Devices': '7b491aad371adb003ef7a9c2b3990e22',
'': '',
'Manage AAA - ISE Admin Functions': 'dc5f99a2dbb2b74019d81ffa68961933',
'SNOW Change Request': '325b08913783d6c4f4f4c97a43990e90',
'Incident Request': '3ee543f61b363740f713ed7b2f4bcbc0'
};
const seedKeys = Object.keys( seed );
const arr = [...new Array(50000)].map( _ => {
let name = seedKeys[parseInt(Math.random() * seedKeys.length)];
return { name, sysId: seed[name] };
} );
console.log(`array generated with ${arr.length} items`);
function reduceWithCount( arr, prop ) {
return arr.reduce( (agg, item) => {
const key = item[prop];
if (!agg[key]) {
agg[key] = { count: 0, item };
}
agg[key].count++;
return agg;
}, {});
}
function createArrayFromObject( countedDictionary, prop ) {
return Object.keys( countedDictionary )
.map( key => countedDictionary[key].item )
.sort( (a, b) => countedDictionary[b[prop]].count - countedDictionary[a[prop]].count );
}
console.time('removeAndSort');
const removedDuplicates = reduceWithCount( arr, 'name' );
const sortedArray = createArrayFromObject( removedDuplicates, 'name' );
console.timeEnd('removeAndSort');
console.log( removedDuplicates );
console.log( sortedArray );