Question

编辑：

上下文：我继承了一个进程（来自前同事），生成一个通用文件，除其他外，创建以下项目列表。该列表稍后将需要转换为一系列无序链接，并保留嵌套级别。

从以下数组中，我需要删除重复项，无论它根据href属性的值显示多少次。

var array = [
 '<tag href="cheese.html">',
 '<tag href="cheddar.html"></tag>',
 '  <tag href="cheese.html"></tag>',
 '</tag>',
 '<tag href="burger.html">',
 ' <tag href="burger.html">',
 '   <tag href="burger.html"></tag>'
 ' </tag>'
 '</tag>'
 '<tag href="lettuce.html">',
 '  <tag href="lettuce.html">',
 '    <tag href="lettuce.html"></tag>',
 '  </tag>',
 '</tag>',
 '<tag href="tomato.html">',
 '  <tag href="tomato.html"></tag>',
 '  <tag href="tomato.html">',
 '    <tag href="tomato.html"></tag>',
 '    <tag href="tomato.html">',
 '      <tag href="tomato.html"></tag>',
 '      <tag href="tomato.html">',
 '        <tag href="tomato.html"></tag>',
 '      </tag>',
 '    </tag>',
 '  </tag>',
 '</tag>',
];

在删除所有重复项后，它应如下所示：

'<tag href="cheese.html">',
'<tag href="cheddar.html"></tag>',
'</tag>',
'<tag href="burger.html">',
'</tag>',
'<tag href="lettuce.html">',
'</tag>',

从这里开始，我无需提取生成无序链接列表所需的信息。我只需要帮助找出如何删除重复项。

Answer 1

了解问题的背景会很有帮助。

此函数返回具有唯一href值的所有字符串，但不管理结束标记。删除结束标签将是一项复杂的任务。另外，我非常确定使用正则表达式解析HTML是not a good idea。

function sortByHref (array) {
  var hrefReg = new RegExp('href="(.*)"');
  var seen = {};
  var match, href;
  return array.filter(function (x) {
    match = hrefReg.exec(x);
    if (match) {
      href = match[1];
      if (seen.hasOwnProperty(href) && seen[href]) return false;
      seen[href] = true;
    }
    return true;
  });
}

如果你已经描述了你想要完成什么，那么必须有另一种方法来解决你的问题。

Answer 2

这是一个有目的的详细解决方案，以便于理解。我假设没有href值的标签只会删除基于整个字符串的重复项。

var arr = [
    '<tag href="cheese.html">',
    '<tag href="cheddar.html"></tag>',
    '  <tag href="cheese.html"></tag>',
    '</tag>',
    '<tag href="burger.html">',
    ' <tag href="burger.html">',
    '   <tag href="burger.html"></tag>',
    ' </tag>',
    '</tag>'
];

// Remove whitespaces on both ends from each string in array
// Not a necessary step, but will just handle leading and trailing whitespaces this way for convenience
arr = arr.map(function(tagString) {
    return tagString.trim(); 
}); 

// Regex to retrieve href value from tags
var hrefRegexp = /(\s+href=\")([^\"]+)(\")/g;

// Create an array with just the href values for easier lookup
hrefArr = arr.map(function(tagString) {
    // Run regex against the tag string
    var href = hrefRegexp.exec(tagString); 

    // Reset `RegExp`'s index
    hrefRegexp.lastIndex = 0; 

    // If no href match is found, return null, 
    if (href === null) return null; 

    // Otherwise, return the href value
    else return href[2]; 
});

// Store array length (this value will be used in the for loop below)
var arrLength = arr.length; 

// Begin from the left and compare values on the right
for (var leftCompareIndex = 0; leftCompareIndex < arrLength; leftCompareIndex++) {
    for (var rightCompareIndex = leftCompareIndex + 1; rightCompareIndex < arrLength; rightCompareIndex++) {

        // A flag variable to indicate whether the value on the right is a duplicate
        var isRightValueDuplicate = false; 

        // If href value doesn't exist, simply compare whole string
        if (hrefArr[leftCompareIndex] === null) {
            if (arr[leftCompareIndex] === arr[rightCompareIndex]) {
                isRightValueDuplicate = true; 
            }
        }

        // If href value does exist, compare the href values
        else {
            if (hrefArr[leftCompareIndex] === hrefArr[rightCompareIndex]) {
                isRightValueDuplicate = true; 
            }
        }

        // Check flag and remove duplicate element from both original array and href values array
        if (isRightValueDuplicate === true) {
            arr.splice(rightCompareIndex, 1); 
            hrefArr.splice(rightCompareIndex, 1); 
            arrLength--; 
            rightCompareIndex--; 
        }
    }
}

console.log(arr); 

/* Should output
[ '<tag href="cheese.html">',
  '<tag href="cheddar.html"></tag>',
  '</tag>',
  '<tag href="burger.html">' ]
  */

根据部分内容删除重复的数组元素？

2 个答案: