如何在关联数组中合并重复项?

时间:2019-08-31 13:50:27

标签: javascript replace merge duplicates associative-array

找出JavaScript数组是否包含重复项并将其合并为新数组的最简洁,最有效的方法是什么?

我尝试了Lodash / d3 / underscoreJs,但是它们都不产生干净的结果,所以我尝试了此代码->

var arr = [
    {
        "title": "My unique title",
        "link": "domainlinkto-my-unique-title",
        "image": "someurlto/my-unique-title-image",
        "date": "Mon, 29 Jul 2019 02:25:08 -0000",
        "site": "site1"
    },
    {
        "title": "A duplicate title",
        "link": "somedomainlinkto-a-duplicate-title/",
        "image": "randomurlto/a-duplicate-title.jpg",
        "date": "Sun, 25 Aug 2019 15:52:59 -0000",
        "site": "site1"
    },
    {
        "title": "A duplicate title",
        "link": "otherdomainlinkto-a-duplicate-title/",
        "image": "anotherurlto/duplicate-title.jpg",
        "date": "Sun, 25 Aug 2019 21:09:37 -0000",
        "site": "site2"
    },
    {
        "title": "A DUPLICATE TITLE",
        "link": "someotherdomainlinkto-a-duplicate-title/",
        "image": "someurlto/aduplicatetitle.jpg",
        "date": "Sat, 24 Aug 2019 18:43:38 -0000",
        "site": "site3"
    },
    {
        "title": "Other duplicate: title",
        "link": "anydomainlinkto-other-duplicate-title/",
        "image": "anotherdomainurlto/other-duplicate-title.jpg",
        "date": "Mon, 26 Aug 2019 00:37:28 -0000",
        "site": "site2"
    },
    {
        "title": "Other duplicate : title",
        "link": "anyotherdomainlinkto-other-duplicate-title/",
        "image": "exampleurlto/hjKGHK45huu.jpg",
        "date": "Mon, 26 Aug 2019 00:37:28 -0000",
        "site": "site5"
    },
    {
        "title": "Other unique title",
        "link": "anydomainlinkto-other-unique-title/",
        "image": "anyotherurlto/img/other-title.jpg",
        "date": "Mon, 26 Aug 2019 09:18:10 -0000",
        "site": "site3"
    }
        ];
    Array.prototype.groupBy = function (props) {
        var arr = this;
        var partialResult = {};
        var imgResult = {};
        arr.forEach(el=>{
            var grpObj = {};
            var grpImg = {};
        props.forEach(prop=>{
            grpObj.title = el.title;
            grpImg.image = el.image;
        });
        var key = JSON.stringify(grpObj);
        var keyImg = JSON.stringify(grpImg);
        if(!imgResult[key]) {
            imgResult[key] = grpImg.image;
        } else {
            imgResult[key] = el.image;
        }
        if(!partialResult[key]) partialResult[key] = [];
            partialResult[key].push(
            {
            link: el.link,
            site: el.site, 
            date: el.date 
            });
        });
        var finalResult = Object.keys(partialResult, imgResult).map(key=>{
            var keyObj = JSON.parse(key);
            keyObj.links = partialResult[key];
            keyObj.image = imgResult[key];
            return keyObj;
        })
        return finalResult;}

    var filtered = arr.groupBy(['title']);
    console.log(filtered);

但是... 如您所见,大写中的[titles]和“其他重复项:title”不被视为重复项

我该怎么做--->

var expected = [
    {
        "title": "My unique title",
        "links": [{"date": "Mon, 29 Jul 2019 02:25:08 -0000","site": "site1", "link": "domainlinkto-my-unique-title"}],
        "image": "someurlto/my-unique-title-image",
    },
    {
        "title": "My duplicate title",
        "links": [
	    {"date": "Sun, 25 Aug 2019 15:52:59 -0000","site": "site1","link":"somedomainlinkto-a-duplicate-title/"},
	    {"date": "Sun, 25 Aug 2019 21:09:37 -0000","site": "site2","link": "otherdomainlinkto-a-duplicate-title/"},
	    {"date": "Sat, 24 Aug 2019 18:43:38 -0000","site": "site3","link": "someotherdomainlinkto-a-duplicate-title/"}
	    ],
        "image": "randomurlto/a-duplicate-title.jpg",
    },
    {
        "title": "Other duplicate: title",
        "links": [
	    {"date": "Sun, 25 Aug 2019 15:52:59 -0000","site": "site2","link":"anydomainlinkto-other-duplicate-title/"},
	    {"date": "Mon, 26 Aug 2019 00:37:28 -0000","site": "site5","link": "anyotherdomainlinkto-other-duplicate-title/"}
	    ],
        "image": "anotherdomainurlto/other-duplicate-title.jpg",
    },
    {
        "title": "Other unique title",
        "links": [{"date": "Mon, 26 Aug 2019 09:18:10 -0000","site": "site1", "link": "anydomainlinkto-other-unique-title/"}],
        "image": "anyotherurlto/img/other-title.jpg",
        "site": "site3"
    }
];
console.log(expected);

嗨,Genious

找出JavaScript数组是否包含重复项并将其合并为新数组的最简洁,最有效的方法是什么?

我尝试了Lodash / d3 / underscoreJs,但是它们都不产生干净的结果,所以我尝试了此代码->

var arr = [
    {
        "title": "My unique title",
        "link": "domainlinkto-my-unique-title",
        "image": "someurlto/my-unique-title-image",
        "date": "Mon, 29 Jul 2019 02:25:08 -0000",
        "site": "site1"
    },
    {
        "title": "A duplicate title",
        "link": "somedomainlinkto-a-duplicate-title/",
        "image": "randomurlto/a-duplicate-title.jpg",
        "date": "Sun, 25 Aug 2019 15:52:59 -0000",
        "site": "site1"
    },
    {
        "title": "A duplicate title",
        "link": "otherdomainlinkto-a-duplicate-title/",
        "image": "anotherurlto/duplicate-title.jpg",
        "date": "Sun, 25 Aug 2019 21:09:37 -0000",
        "site": "site2"
    },
    {
        "title": "A DUPLICATE TITLE",
        "link": "someotherdomainlinkto-a-duplicate-title/",
        "image": "someurlto/aduplicatetitle.jpg",
        "date": "Sat, 24 Aug 2019 18:43:38 -0000",
        "site": "site3"
    },
    {
        "title": "Other duplicate: title",
        "link": "anydomainlinkto-other-duplicate-title/",
        "image": "anotherdomainurlto/other-duplicate-title.jpg",
        "date": "Mon, 26 Aug 2019 00:37:28 -0000",
        "site": "site2"
    },
    {
        "title": "Other duplicate : title",
        "link": "anyotherdomainlinkto-other-duplicate-title/",
        "image": "exampleurlto/hjKGHK45huu.jpg",
        "date": "Mon, 26 Aug 2019 00:37:28 -0000",
        "site": "site5"
    },
    {
        "title": "Other unique title",
        "link": "anydomainlinkto-other-unique-title/",
        "image": "anyotherurlto/img/other-title.jpg",
        "date": "Mon, 26 Aug 2019 09:18:10 -0000",
        "site": "site3"
    }
        ];
    Array.prototype.groupBy = function (props) {
        var arr = this;
        var partialResult = {};
        var imgResult = {};
        arr.forEach(el=>{
            var grpObj = {};
            var grpImg = {};
        props.forEach(prop=>{
            grpObj.title = el.title;
            grpImg.image = el.image;
        });
        var key = JSON.stringify(grpObj);
        var keyImg = JSON.stringify(grpImg);
        if(!imgResult[key]) {
            imgResult[key] = grpImg.image;
        } else {
            imgResult[key] = el.image;
        }
        if(!partialResult[key]) partialResult[key] = [];
            partialResult[key].push(
            {
            link: el.link,
            site: el.site, 
            date: el.date 
            });
        });
        var finalResult = Object.keys(partialResult, imgResult).map(key=>{
            var keyObj = JSON.parse(key);
            keyObj.links = partialResult[key];
            keyObj.image = imgResult[key];
            return keyObj;
        })
        return finalResult;}

    var filtered = arr.groupBy(['title']);
    console.log(filtered);

但是... 如您所见,大写中的[titles]和“其他重复项:title”不被视为重复项

我该怎么做--->

[
    {
        "title": "My unique title",
        "links": [{"date": "Mon, 29 Jul 2019 02:25:08 -0000","site": "site1", "link": "domainlinkto-my-unique-title"}],
        "image": "someurlto/my-unique-title-image",
    },
    {
        "title": "My duplicate title",
        "links": [
	    {"date": "Sun, 25 Aug 2019 15:52:59 -0000","site": "site1","link":"somedomainlinkto-a-duplicate-title/"},
	    {"date": "Sun, 25 Aug 2019 21:09:37 -0000","site": "site2","link": "otherdomainlinkto-a-duplicate-title/"},
	    {"date": "Sat, 24 Aug 2019 18:43:38 -0000","site": "site3","link": "someotherdomainlinkto-a-duplicate-title/"}
	    ],
        "image": "randomurlto/a-duplicate-title.jpg",
    },
    {
        "title": "Other duplicate: title",
        "links": [
	    {"date": "Sun, 25 Aug 2019 15:52:59 -0000","site": "site2","link":"anydomainlinkto-other-duplicate-title/"},
	    {"date": "Mon, 26 Aug 2019 00:37:28 -0000","site": "site5","link": "anyotherdomainlinkto-other-duplicate-title/"}
	    ],
        "image": "anotherdomainurlto/other-duplicate-title.jpg",
    },
    {
        "title": "Other unique title",
        "links": [{"date": "Mon, 26 Aug 2019 09:18:10 -0000","site": "site1", "link": "anydomainlinkto-other-unique-title/"}],
        "image": "anyotherurlto/img/other-title.jpg",
        "site": "site3"
    }
];

我敢肯定这不是更好的方法(我们同意),所以我要问stackoverflow genious ...

感谢您的阅读和花时间思考我的问题

2 个答案:

答案 0 :(得分:1)

在构建用于分组的json对象之前,我只是将标题小写。而且我会使用对象分解来清理事物以及一个哈希表,如果对其中的属性进行硬编码,我不会在通用Array.prototype.groupBy中看到这种含义:

  const hash = {}, result = [];

  for(const { title, link, image, date, site } of input) {
    const key = JSON.stringify({ title: title.toLowerCase().replace(/ /g, ""), });
    if(hash[key]) {
      hash[key].push({ link, date, site });
    } else {
      result.push({ title, image, links: hash[key] = [{ link, date, site }], });
    }
 }

答案 1 :(得分:0)

好像您想按小写字母将标题分组而不用空格:

var arr = [{"title":"My unique title","link":"domainlinkto-my-unique-title","image":"someurlto/my-unique-title-image","date":"Mon, 29 Jul 2019 02:25:08 -0000","site":"site1"},{"title":"A duplicate title","link":"somedomainlinkto-a-duplicate-title/","image":"randomurlto/a-duplicate-title.jpg","date":"Sun, 25 Aug 2019 15:52:59 -0000","site":"site1"},{"title":"A duplicate title","link":"otherdomainlinkto-a-duplicate-title/","image":"anotherurlto/duplicate-title.jpg","date":"Sun, 25 Aug 2019 21:09:37 -0000","site":"site2"},{"title":"A DUPLICATE TITLE","link":"someotherdomainlinkto-a-duplicate-title/","image":"someurlto/aduplicatetitle.jpg","date":"Sat, 24 Aug 2019 18:43:38 -0000","site":"site3"},{"title":"Other duplicate: title","link":"anydomainlinkto-other-duplicate-title/","image":"anotherdomainurlto/other-duplicate-title.jpg","date":"Mon, 26 Aug 2019 00:37:28 -0000","site":"site2"},{"title":"Other duplicate : title","link":"anyotherdomainlinkto-other-duplicate-title/","image":"exampleurlto/hjKGHK45huu.jpg","date":"Mon, 26 Aug 2019 00:37:28 -0000","site":"site5"},{"title":"Other unique title","link":"anydomainlinkto-other-unique-title/","image":"anyotherurlto/img/other-title.jpg","date":"Mon, 26 Aug 2019 09:18:10 -0000","site":"site3"}]
    
var result = arr.reduce((o, { title, image, link, date, site }, k) => (
  ( o[k = title.toLowerCase().replace(/ /g, '')] = o[k] || { title, image, links: [] } )
  .links.push({ date, site, link }), o), {})

console.log( Object.values(result) )


如果它也必须在IE中工作:

var arr = [{"title":"My unique title","link":"domainlinkto-my-unique-title","image":"someurlto/my-unique-title-image","date":"Mon, 29 Jul 2019 02:25:08 -0000","site":"site1"},{"title":"A duplicate title","link":"somedomainlinkto-a-duplicate-title/","image":"randomurlto/a-duplicate-title.jpg","date":"Sun, 25 Aug 2019 15:52:59 -0000","site":"site1"},{"title":"A duplicate title","link":"otherdomainlinkto-a-duplicate-title/","image":"anotherurlto/duplicate-title.jpg","date":"Sun, 25 Aug 2019 21:09:37 -0000","site":"site2"},{"title":"A DUPLICATE TITLE","link":"someotherdomainlinkto-a-duplicate-title/","image":"someurlto/aduplicatetitle.jpg","date":"Sat, 24 Aug 2019 18:43:38 -0000","site":"site3"},{"title":"Other duplicate: title","link":"anydomainlinkto-other-duplicate-title/","image":"anotherdomainurlto/other-duplicate-title.jpg","date":"Mon, 26 Aug 2019 00:37:28 -0000","site":"site2"},{"title":"Other duplicate : title","link":"anyotherdomainlinkto-other-duplicate-title/","image":"exampleurlto/hjKGHK45huu.jpg","date":"Mon, 26 Aug 2019 00:37:28 -0000","site":"site5"},{"title":"Other unique title","link":"anydomainlinkto-other-unique-title/","image":"anyotherurlto/img/other-title.jpg","date":"Mon, 26 Aug 2019 09:18:10 -0000","site":"site3"}];
    
var result = arr.reduce(function(o, v) {
  var key = v.title.replace(/ /g, '').toLowerCase();
  if (!o[key]) o[key] = { title: v.title, image: v.image, links: [] };
  o[key].links.push({ date: v.date, site: v.site, link: v.link });
  return o; 
}, {});

console.log( Object.keys(result).map(function(k) { return result[k]; }) );