根据元素的频率对数组进行排序

时间:2020-05-11 07:55:48

标签: javascript jquery angularjs

我有一个包含重复数据的数组。我想根据频率对其进行排序(首先是最高重复项元素),然后删除重复项。

我尝试了下面的代码,它抛出了不同的顺序。

var arr=[
    {name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22"},
    {name: "", sys_id: ""},
    {name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
    {name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22"},
    {name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
    {name: "SNOW Change Request", sys_id: "325b08913783d6c4f4f4c97a43990e90"},
    {name: "", sys_id: ""},
    {name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
    {name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
    {name: "Incident Request", sys_id: "3ee543f61b363740f713ed7b2f4bcbc0"}
];

console.log(arr.reduce((x, y) => {
  if (x[y.name]) {
    x[y.name]++;
    return x;
  } else {
    var z = {};
    z[y.name] = 1;
    return Object.assign(x, z);
  }
}, {}))

3 个答案:

答案 0 :(得分:1)

不是最佳解决方案,但您可以将其作为起点:

var arr = [{
    "name": "Manage AAA Devices",
    "sys_id": "7b491aad371adb003ef7a9c2b3990e22"
  },
  {
    "name": "",
    "sys_id": ""
  },
  {
    "name": "Manage AAA - ISE Admin Functions",
    "sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
  },
  {
    "name": "Manage AAA Devices",
    "sys_id": "7b491aad371adb003ef7a9c2b3990e22"
  },
  {
    "name": "Manage AAA - ISE Admin Functions",
    "sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
  },
  {
    "name": "SNOW Change Request",
    "sys_id": "325b08913783d6c4f4f4c97a43990e90"
  },
  {
    "name": "",
    "sys_id": ""
  },
  {
    "name": "Manage AAA - ISE Admin Functions",
    "sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
  },
  {
    "name": "Manage AAA - ISE Admin Functions",
    "sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
  },
  {
    "name": "Incident Request",
    "sys_id": "3ee543f61b363740f713ed7b2f4bcbc0"
  }
];

const noOfOccurencies = arr.reduce((x, y) => {
  if (x[y.name]) {
    x[y.name]++;
    return x;
  } else {
    var z = {};
    z[y.name] = 1;
    return Object.assign(x, z);
  }
}, {})

console.log(noOfOccurencies);

// Order an array of objects based on another array order
// https://gist.github.com/ecarter/1423674
function mapOrder(array, order, key) {
  array.sort(function(a, b) {
    var A = a[key],
      B = b[key];

    if (order.indexOf(A) > order.indexOf(B)) {
      return 1;
    } else {
      return -1;
    }
  });
  return array;
};

// sort noOfOccurencies
const itemOrder = Object.keys(noOfOccurencies).sort(function(a, b) {
  return noOfOccurencies[a] - noOfOccurencies[b]
})
// reverse noOfOccurencies (*highest duplicates element first*)
itemOrder.reverse();

console.log(itemOrder);

// order the original array based on itemOrder
let ordered_array = mapOrder(arr, itemOrder, 'name');

// remove duplicates from the new array ordered
const noDuplicatesArr = ordered_array.filter((v, i, a) => a.findIndex(t => (t.name === v.name)) === i)

console.log(noDuplicatesArr);

答案 1 :(得分:1)

let arr = [
    {name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22"},
    {name: "", sys_id: ""},
    {name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
    {name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22"},
    {name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
    {name: "SNOW Change Request", sys_id: "325b08913783d6c4f4f4c97a43990e90"},
    {name: "", sys_id: ""},
    {name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
    {name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933"},
    {name: "Incident Request", sys_id: "3ee543f61b363740f713ed7b2f4bcbc0"},
];

function sortByFrequency(arrayWithDuplicates) {
    let tempArray = [];
    let temp = [];

    arrayWithDuplicates.forEach((item) => {
            temp.push(JSON.stringify(item));
        }
    );

    (Array(...new Set(temp))).forEach((x) => {
        tempArray.push(arrayWithDuplicates.filter((y) => {
            return x === JSON.stringify(y);
        }));
    });

    tempArray.sort((a, b) => {
        return a.length - b.length;
    });

    temp = [];
    tempArray.forEach((item) => {
        temp.push({name: item[0].name, frequency: item.length});
    });

    return temp.reverse();
}

console.log(sortByFrequency(arr));

答案 2 :(得分:1)

我认为最简单的方法可能只是将原始项目保留在要创建的字典中,然后再使用该字典进行排序。无论如何,字典中将仅包含1个项目,并且该项目将具有项目计数。

因此您将略微更改reduce函数

function reduceWithCount( arr, prop ) {
  return arr.reduce( (agg, item) => {
    const key = item[prop];
    if (!agg[key]) {
      agg[key] = { count: 0, item };
    }
    agg[key].count++;
    return agg;
  }, {});
}

然后根据记录的频率创建一个新数组,您可以将其映射回去并使用之前从字典对象中保存的计数

function createArrayFromObject( countedDictionary, prop ) {
  return Object.keys( countedDictionary )
    .map( key => countedDictionary[key].item )
    .sort( (a, b) => countedDictionary[b[prop]].count - countedDictionary[a[prop]].count );
}

但是,这意味着使用这些功能,您将不得不两次传递密钥,但是我认为这并没有太大的开销。

我稍微更改了代码段的代码,以使其不使用您的原始数组,而是随机生成了一个数组,以查看它将如何处理50.000个条目。

只要原始数组中没有50000个不同的名称,使用简单的字典查找就可以了。

const seed = {
  'Manage AAA Devices': '7b491aad371adb003ef7a9c2b3990e22',
  '': '',
  'Manage AAA - ISE Admin Functions': 'dc5f99a2dbb2b74019d81ffa68961933',
  'SNOW Change Request': '325b08913783d6c4f4f4c97a43990e90',
  'Incident Request': '3ee543f61b363740f713ed7b2f4bcbc0'
};

const seedKeys = Object.keys( seed );
const arr = [...new Array(50000)].map( _ => {
  let name = seedKeys[parseInt(Math.random() * seedKeys.length)];
  return { name, sysId: seed[name] };
} );

console.log(`array generated with ${arr.length} items`);

function reduceWithCount( arr, prop ) {
  return arr.reduce( (agg, item) => {
    const key = item[prop];
    if (!agg[key]) {
      agg[key] = { count: 0, item };
    }
    agg[key].count++;
    return agg;
  }, {});
}

function createArrayFromObject( countedDictionary, prop ) {
  return Object.keys( countedDictionary )
    .map( key => countedDictionary[key].item )
    .sort( (a, b) => countedDictionary[b[prop]].count - countedDictionary[a[prop]].count );
}

console.time('removeAndSort');
const removedDuplicates = reduceWithCount( arr, 'name' );
const sortedArray = createArrayFromObject( removedDuplicates, 'name' );
console.timeEnd('removeAndSort');
console.log( removedDuplicates );
console.log( sortedArray );