改进重复查找器和循环性能

时间:2017-04-28 12:08:23

标签: javascript arrays object for-loop duplicates

我制作了一个脚本,可以在对象数组中找到一个键的副本。如果找到重复项,则会向具有重复键值的对象添加新键和值(“Duplicate”:true)。

数据示例

{
    "Id": "1",
    "NI Number": "NG111111A",
    "Full Name": "Test Test Tester",
    "Address Line 1": "My House",
    "Address Line 2": "My Road",
    "Address Line 3": "My Suburb",
    "City / Town": "My Town",
    "Country": "United Kingdom",
    "PostCode": "",
    "Creation Date": "24 December 2014"
},
{
    "Id": "2",
    "NI Number": "NM123405C",
    "Full Name": "A Dummy",
    "Address Line 1": "Dummy 1",
    "Address Line 2": "Dummy 2",
    "Address Line 3": "Dummy 3",
    "City / Town": "Dummy 4",
    "Country": "United Kingdom",
    "PostCode": "",
    "Creation Date": "09 February 2015"
}

脚本

for (let i = 0, len = cleanedData.length; i < len; i++) {

    let foundDuplicate = false;

    if (cleanedData[i]["Duplicate"] === "false" || cleanedData[i]["Duplicate"] === undefined) {

        for (let t = i + 1, len = cleanedData.length; t < len; t++) {

            if (cleanedData[i]["NI Number"] === cleanedData[t]["NI Number"]) {
                foundDuplicate = true;
                cleanedData[t]["Duplicate"] = true;
            }

        }

        if (foundDuplicate === true) {
            cleanedData[i]["Duplicate"] = true;
        } else {
             cleanedData[i]["Duplicate"] = false;
        }
    }

}

我试图在33,000条记录中找到重复的“NI号码”。 NI编号可以重复多次。该脚本当前按预期工作,但运行时间超过70秒。如果可能,我想将其减少到35秒。

我是JavaScript的新手,但是我从使用带有缓存长度的for循环读取的内容是迭代数组的快速方法。我已经阅读过map,set可以提高性能,但我不知道如何将它们实现到我的脚本中。

有没有办法提高我的代码的性能?

1 个答案:

答案 0 :(得分:0)

您还没有提供任何基准测试工具,所以希望以下内容有所帮助。

您可以减少测试次数,并尽可能少地进行查找。此外,您的一些测试是错误的,因为您指定布尔值但测试字符串。希望评论足够。

&#13;
&#13;
// Reduce test data to minimum required, increase sample size
var cleanedData = [
  {"Id": "1","NI Number": "NG111111A"},
  {"Id": "2","NI Number": "NM123405C"}, // Duplicate
  {"Id": "3","NI Number": "NM123405D"},
  {"Id": "4","NI Number": "NM123405E"}, // Duplicate
  {"Id": "5","NI Number": "NM123405C"}, // Duplicate
  {"Id": "4","NI Number": "NM123405E"}, // Duplicate
  {"Id": "4","NI Number": "NM123405F"}, 
  {"Id": "4","NI Number": "NM123405E"}  // Duplicate
 ];

// Use var for better compatibility unless you really need let
for (var i = 0, iLen = cleanedData.length; i < iLen; i++) {
   
  // Store ref to current object for performance
  var obj = cleanedData[i];

  // If doesn't have Duplicate property, add and set to false
  // Only test if not already marked as a duplicate
  if (!('Duplicate' in obj)) {
    obj.Duplicate = false;

    // Reuse iLen, don't need to get length of array again
    for (var t = i + 1; t < iLen; t++) {

      // Store ref to test obj for performance
      var tObj = cleanedData[t];

      // Simplify assignment to both objects if duplicate found
      if (obj['NI Number'] === tObj['NI Number']) {
        obj.Duplicate = true;
        tObj.Duplicate = true;
      }
    }
  }
}

console.log(cleanedData)
&#13;
&#13;
&#13;

请告诉我它是否有助于提高性能。您也可以使用 forEach 之类的数组方法,但我认为 for 循环在某些主机中至少同样快速且快得多。

按照Salman A的建议实现索引会像下面这样,它简洁(好,比第一种方法少一行)并且可能很快,因为它只循环一次,但确实做了很多更多写入 index

&#13;
&#13;
var cleanedData = [
  {"Id": "1","NI Number": "NG111111A"},
  {"Id": "2","NI Number": "NM123405C"}, // Duplicate
  {"Id": "3","NI Number": "NM123405D"},
  {"Id": "4","NI Number": "NM123405E"}, // Duplicate
  {"Id": "5","NI Number": "NM123405C"}, // Duplicate
  {"Id": "4","NI Number": "NM123405E"}, // Duplicate
  {"Id": "4","NI Number": "NM123405F"}, 
  {"Id": "4","NI Number": "NM123405E"}  // Duplicate
 ];
 
// Store NI Number value and index first found
var index = {};

for (var i=0, iLen=cleanedData.length; i<iLen; i++) {
  var obj = cleanedData[i];
  var value = obj['NI Number'];

  // If obj doesn't have Duplicate property, add as false
  if (!('Duplicate' in obj)) obj.Duplicate = false;

  // If value is in index, mark duplicates
  if (value in index) {
    obj.Duplicate = true;
    cleanedData[index[value]].Duplicate = true;
    
   // Otherwise, add value to index
  } else {
    index[value] = i;
  }
}

console.log(cleanedData);
&#13;
&#13;
&#13;