排序JSON数据并获取前n条记录

时间:2018-09-21 07:16:07

标签: javascript jquery sorting optimization

我有大约几百万条记录的json数据。我必须要做一个简单但棘手的功能。

  

我该怎么办?

我必须从json中收集计数最高的十个最常出现的项目。按最多出现的意思,我的意思是json中该项目的计数最高。我还不确定,如何处理count,我的意思是可以将其添加到与属性相同的json对象中。

  

这是我到目前为止所做的。

//my origional json, it's too big but adding some portion of it.
var jsonData = [    
  {
    "id": "5",
    "name": "#5"
  },
  {
    "id": "1",
    "name": "#1"
  },
  {
    "id": "2",
    "name": "#2"
  },
  {
    "id": "8",
    "name": "#8"
  },
  {
    "id": "1",
    "name": "#1"
  },
  {
    "id": "10",
    "name": "#10" 
  },  
  {
    "id": "2",
    "name": "#2"
  }];    

var top10Data = [];

//geting top 10 items
function getTop10Data() {
    var i = 0;
    while (i <= 20) {
        top10Data.push(getTop1Data());
        i++;
    }

    return true;
}

//getting top 1 data that has max count in json
function getTop1Data() {
    var store = jsonData, distribution = {}, max = 0, result = [];

    store.forEach(function (a) {
        distribution[a] = (distribution[a] || 0) + 1;
        if (distribution[a] > max) {
            max = distribution[a];
            result = [a];
            return;
        }
        if (distribution[a] === max) {
            result.push(a);
        }
    });

    //remove this item with it's all occurences, and push it to top10Data
    removeData(result); 

    return result;
}

//remove items from origional json. but this is not working properly as it removes only one item from top
function removeData(result) {
    var length = jsonData.length;
    for (var i = 0; i < length; i++) {
        if (jsonData[i].toppings === result[0].toppings) {
            jsonData.splice(jsonData[i], 1);            
        }
    }
}
  

我的问题。

我认为自己的做法不合适,是否有更好的方法来处理这种情况。如果我的方法还可以,那么当前代码中我缺少的内容。

我们将不胜感激。

2 个答案:

答案 0 :(得分:1)

您可以将数据简化为一个对象,该对象保存每个项目的计数,并由已串化的项目索引。然后,如果没有足够数量的唯一对象,则可以按条目的出现次数sort进行切片,然后对前10个切片。

var jsonData = [    
  {
    "id": "5",
    "name": "#5"
  },
  {
    "id": "1",
    "name": "#1"
  },
  {
    "id": "2",
    "name": "#2"
  },
  {
    "id": "8",
    "name": "#8"
  },
  {
    "id": "1",
    "name": "#1"
  },
  {
    "id": "10",
    "name": "#10" 
  },  
  {
    "id": "2",
    "name": "#2"
  }];
 const counts = jsonData.reduce((a, obj) => {
  const string = JSON.stringify(obj);
  a[string] = (a[string] || 0) + 1
  return a;
}, {});
const result = Object.entries(counts)
  .sort((a, b) => b[1] - a[1])
  .slice(0, 10)
  .map(([string, count]) => ({ count, obj: JSON.parse(string) }));
console.log(result);

要将计数添加到原始数据,请在构造counts对象之后遍历数据:

var jsonData = [    
  {
    "id": "5",
    "name": "#5"
  },
  {
    "id": "1",
    "name": "#1"
  },
  {
    "id": "2",
    "name": "#2"
  },
  {
    "id": "8",
    "name": "#8"
  },
  {
    "id": "1",
    "name": "#1"
  },
  {
    "id": "10",
    "name": "#10" 
  },  
  {
    "id": "2",
    "name": "#2"
  }];
 const counts = jsonData.reduce((a, obj) => {
  const string = JSON.stringify(obj);
  a[string] = (a[string] || 0) + 1
  return a;
}, {});
jsonData.forEach((item) => {
  item.count = counts[JSON.stringify(item)];
});
console.log(jsonData);

答案 1 :(得分:1)

我创建了一个逻辑,它正在工作。 步骤如下:

  • 根据名称对数组进行排序
  • 读取排序后的数组并计算出现的名称数 连续并存储计数数字
  • 根据计数再次排序

附加了示例代码。请看一下最终结果。

var jsonData = [    
  {
    "id": "5",
    "name": "#5"
  },
  {
    "id": "1",
    "name": "#1"
  },
  {
    "id": "2",
    "name": "#2"
  },
  {
    "id": "8",
    "name": "#8"
  },
  {
    "id": "1",
    "name": "#1"
  },
  {
    "id": "10",
    "name": "#10" 
  },  
  {
    "id": "2",
    "name": "#2"
  }];    
   
  
  
  $('#output1').html(JSON.stringify(jsonData));
  
	jsonData.sort((a, b) => a.name < b.name ? -1 : a.name > b.name ? 1 : 0)
  
  $('#output2').html(JSON.stringify(jsonData));
  
  let newArray= [];
  
  	let total = 1;
for(let i=0;i<jsonData.length;i++){
 let nextName = (i==jsonData.length -1)?0 :	jsonData[i+1].name;
    let currentName = jsonData[i].name;
    
    if(nextName != currentName){
      newArray.push({
      	id : jsonData[i].id,
        name : currentName,
        count : total
      });
				total = 1;
      }
      else{
      	total+=1;
      }
  }
  
  $('#output3').html(JSON.stringify(newArray));
  
  //Lets sort it again based on count and take the top 10
  
newArray.sort((a, b) => a.count > b.count ? -1 : a.count < b.count ? 1 : 0)

newArray = newArray.slice(0, 10); // Here is your Data

$('#output4').html(JSON.stringify(newArray));
  
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

<p id="output1">

</p>
<p id="output2">

</p>
<p id="output3">

</p>
<p id="output4">

</p>