Javascript字符串匹配模式帮助

时间:2010-02-12 10:00:23

标签: javascript string pattern-matching

我需要使用Javascript找到几个单词或匹配模式。

这是要求。

我有一个像这样的字符串,

  

这是下一个的快速指南   你找到自己喜欢的石油和其他一些主题的时间

我需要将这个字符串与这样的字符串匹配

favorite oil and some other topics can be based on something blah blah

如何获得匹配文本块的交集?

我已经尝试了相交的Javascript脚本函数,对于某些字符串,它无法正常工作。

如何解决这个问题?这可以使用Regex完成吗?

请建议。

3 个答案:

答案 0 :(得分:8)

您必须找到Longest common substring

如果字符串不长,我建议使用Tim的方法。否则,这是具有动态编程的最长公共子串算法的Javascript实现。运行时为O(mn),其中m和n分别是2个字符串的长度。

示例用法:

var first = "Here is a quick guide for the next time you reach for your favorite oil and some other topics";
var second = "favorite oil and some other topics can be based on something blah blah";

console.log(first.intersection(second)); // ["favorite oil and some other topic"]

这是算法实现。它返回最长公共子串的数组。扩展了本机String类,因此所有字符串都可以使用intersect方法。

String.prototype.intersection = function(anotherString) {
    var grid = createGrid(this.length, anotherString.length);
    var longestSoFar = 0;
    var matches = [];

    for(var i = 0; i < this.length; i++) {
        for(var j = 0; j < anotherString.length; j++) {
            if(this.charAt(i) == anotherString.charAt(j)) {
                if(i == 0 || j == 0) {
                    grid[i][j] = 1;
                }
                else {
                    grid[i][j] = grid[i-1][j-1] + 1;
                }
                if(grid[i][j] > longestSoFar) {
                    longestSoFar = grid[i][j];
                    matches = [];
                }
                if(grid[i][j] == longestSoFar) {
                    var match = this.substring(i - longestSoFar + 1, i);
                    matches.push(match);
                }
            }
        }
    }
    return matches;
}

还需要这个辅助函数来创建一个二维数组,所有元素都初始化为0。

// create a 2d array
function createGrid(rows, columns) {
    var grid = new Array(rows);
    for(var i = 0; i < rows; i++) {
        grid[i] = new Array(columns);
        for(var j = 0; j < columns; j++) {
            grid[i][j] = 0;
        }
    }
    return grid;
}

答案 1 :(得分:3)

这不是很有效,而且总体来说有更好的方法可以做到这一点(参见@ Anurag的答案),但它很简单,适用于短字符串:

function stringIntersection(str1, str2) {
    var strTemp;

    // Swap parameters if necessary to ensure str1 is the shorter
    if (str1.length > str2.length) {
        strTemp = str1;
        str1 = str2;
        str2 = strTemp;
    }

    // Start with the whole of str1 and try shorter substrings until
    // we have a common one
    var str1Len = str1.length, l = str1Len, start, substring;
    while (l > 0) {
        start = str1Len - l;
        while (start >= 0) {
            substring = str1.slice(start, l);
            if (str2.indexOf(substring) > -1) {
                return substring;
            }
            start--;
        }
        l--;
    }
    return "";
}

var s1 = "Here is a quick guide for the next time you reach"
       + " for your favorite oil and some other topics";
var s2 = "favorite oil and some other topics can be based on"
       + " something blah blah";

alert( stringIntersection(s1, s2) );

答案 2 :(得分:0)

过滤字符串的简单polyfill

if (!String.prototype.intersection) {
  String.prototype.intersection = function(anotherString, caseInsensitive = false) {
    const value = (caseInsensitive) ? this.toLowerCase()          : this;
    const comp  = (caseInsensitive) ? anotherString.toLowerCase() : anotherString;
    const ruleArray = comp.split("").reduce((m,v) => {m[v]=true; return m;} ,{})
    return this.split("").filter( (c, i) => ruleArray[value[i]] ).join("")
  }
}

“HelloWorld”.intersection(“HEWOLRLLODo”,true)

“HelloWorld” - 不区分大小写

“的HelloWorld” .intersection( “HEWOLRLLODo”)

“HoWo” - 区分大小写