假设我有两个字符串,有没有办法检查它们是否至少有90%相似?
var string1 = "theBoardmeetstoday,tomorrow51";
var string2 = "Board meets today, tomorrow";
谢谢,
Tegan
答案 0 :(得分:16)
Levenshtein distance的维基百科条目包括示例实现。
答案 1 :(得分:10)
答案 2 :(得分:4)
String.levenshtein(插件MooTools)
检查出来:http://mootools.net/forge/p/string_levenshtein
GitHub:https://github.com/thinkphp/String.levenshtein
此方法计算两个弦之间的Levenshtein距离。在信息理论和计算机科学中,Levenshtein距离是用于测量两个序列之间的差异量(称为编辑距离)的度量。两个字符串之间的Levenshtein距离由将一个字符串转换为另一个给定字符串所需的最小操作数给出,其中可能的操作是单个字符的插入,删除或替换。
Levenshtein距离算法已用于:
答案 3 :(得分:2)
还要考虑Dice's Coefficient,它比string-similarity github存储库及其相应的npm module的创建者认为比Levenshtein距离“更好”。
其文档中的用法:
var stringSimilarity = require('string-similarity');
var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed');
var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);
答案 4 :(得分:0)
借鉴其他人的答案,我写了一个简单的 js 函数 stringsAreSimilar
来做到这一点:
// https://github.com/thinkphp/String.levenshtein/blob/master/Source/String.levenshtein.js
function getStringDifference(stringA, stringB) {
var cost = [],
str1 = stringA,
str2 = stringB,
n = str1.length,
m = str2.length,
i, j;
var minimum = function (a, b, c) {
var min = a;
if (b < min) {
min = b;
}
if (c < min) {
min = c;
}
return min;
};
if (n == 0) {
return;
}
if (m == 0) {
return;
}
for (var i = 0; i <= n; i++) {
cost[i] = [];
}
for (i = 0; i <= n; i++) {
cost[i][0] = i;
}
for (j = 0; j <= m; j++) {
cost[0][j] = j;
}
for (i = 1; i <= n; i++) {
var x = str1.charAt(i - 1);
for (j = 1; j <= m; j++) {
var y = str2.charAt(j - 1);
if (x == y) {
cost[i][j] = cost[i - 1][j - 1];
} else {
cost[i][j] = 1 + minimum(cost[i - 1][j - 1], cost[i][j - 1], cost[i - 1][j]);
}
} //endfor
} //endfor
return cost[n][m];
}
function stringsAreSimilar(stringA, stringB) {
var difference = getStringDifference(stringA, stringB);
debugConsoleLog("stringA" + stringA);
debugConsoleLog("stringB" + stringB);
debugConsoleLog("difference" + difference);
return difference < 10;
}
var string1 = "theBoardmeetstoday,tomorrow51";
var string2 = "Board meets today, tomorrow";
if(similar) {
console.log("they are similar");
} else {
console.log("they are not similar");
}