我有大约几百万条记录的json数据。我必须要做一个简单但棘手的功能。
我该怎么办?
我必须从json中收集计数最高的十个最常出现的项目。按最多出现的意思,我的意思是json中该项目的计数最高。我还不确定,如何处理count,我的意思是可以将其添加到与属性相同的json对象中。
这是我到目前为止所做的。
//my origional json, it's too big but adding some portion of it.
var jsonData = [
{
"id": "5",
"name": "#5"
},
{
"id": "1",
"name": "#1"
},
{
"id": "2",
"name": "#2"
},
{
"id": "8",
"name": "#8"
},
{
"id": "1",
"name": "#1"
},
{
"id": "10",
"name": "#10"
},
{
"id": "2",
"name": "#2"
}];
var top10Data = [];
//geting top 10 items
function getTop10Data() {
var i = 0;
while (i <= 20) {
top10Data.push(getTop1Data());
i++;
}
return true;
}
//getting top 1 data that has max count in json
function getTop1Data() {
var store = jsonData, distribution = {}, max = 0, result = [];
store.forEach(function (a) {
distribution[a] = (distribution[a] || 0) + 1;
if (distribution[a] > max) {
max = distribution[a];
result = [a];
return;
}
if (distribution[a] === max) {
result.push(a);
}
});
//remove this item with it's all occurences, and push it to top10Data
removeData(result);
return result;
}
//remove items from origional json. but this is not working properly as it removes only one item from top
function removeData(result) {
var length = jsonData.length;
for (var i = 0; i < length; i++) {
if (jsonData[i].toppings === result[0].toppings) {
jsonData.splice(jsonData[i], 1);
}
}
}
我的问题。
我认为自己的做法不合适,是否有更好的方法来处理这种情况。如果我的方法还可以,那么当前代码中我缺少的内容。
我们将不胜感激。
答案 0 :(得分:1)
您可以将数据简化为一个对象,该对象保存每个项目的计数,并由已串化的项目索引。然后,如果没有足够数量的唯一对象,则可以按条目的出现次数sort
进行切片,然后对前10个切片。
var jsonData = [
{
"id": "5",
"name": "#5"
},
{
"id": "1",
"name": "#1"
},
{
"id": "2",
"name": "#2"
},
{
"id": "8",
"name": "#8"
},
{
"id": "1",
"name": "#1"
},
{
"id": "10",
"name": "#10"
},
{
"id": "2",
"name": "#2"
}];
const counts = jsonData.reduce((a, obj) => {
const string = JSON.stringify(obj);
a[string] = (a[string] || 0) + 1
return a;
}, {});
const result = Object.entries(counts)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([string, count]) => ({ count, obj: JSON.parse(string) }));
console.log(result);
要将计数添加到原始数据,请在构造counts
对象之后遍历数据:
var jsonData = [
{
"id": "5",
"name": "#5"
},
{
"id": "1",
"name": "#1"
},
{
"id": "2",
"name": "#2"
},
{
"id": "8",
"name": "#8"
},
{
"id": "1",
"name": "#1"
},
{
"id": "10",
"name": "#10"
},
{
"id": "2",
"name": "#2"
}];
const counts = jsonData.reduce((a, obj) => {
const string = JSON.stringify(obj);
a[string] = (a[string] || 0) + 1
return a;
}, {});
jsonData.forEach((item) => {
item.count = counts[JSON.stringify(item)];
});
console.log(jsonData);
答案 1 :(得分:1)
我创建了一个逻辑,它正在工作。 步骤如下:
附加了示例代码。请看一下最终结果。
var jsonData = [
{
"id": "5",
"name": "#5"
},
{
"id": "1",
"name": "#1"
},
{
"id": "2",
"name": "#2"
},
{
"id": "8",
"name": "#8"
},
{
"id": "1",
"name": "#1"
},
{
"id": "10",
"name": "#10"
},
{
"id": "2",
"name": "#2"
}];
$('#output1').html(JSON.stringify(jsonData));
jsonData.sort((a, b) => a.name < b.name ? -1 : a.name > b.name ? 1 : 0)
$('#output2').html(JSON.stringify(jsonData));
let newArray= [];
let total = 1;
for(let i=0;i<jsonData.length;i++){
let nextName = (i==jsonData.length -1)?0 : jsonData[i+1].name;
let currentName = jsonData[i].name;
if(nextName != currentName){
newArray.push({
id : jsonData[i].id,
name : currentName,
count : total
});
total = 1;
}
else{
total+=1;
}
}
$('#output3').html(JSON.stringify(newArray));
//Lets sort it again based on count and take the top 10
newArray.sort((a, b) => a.count > b.count ? -1 : a.count < b.count ? 1 : 0)
newArray = newArray.slice(0, 10); // Here is your Data
$('#output4').html(JSON.stringify(newArray));
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p id="output1">
</p>
<p id="output2">
</p>
<p id="output3">
</p>
<p id="output4">
</p>