背后的故事
我正在使用x-webkit-speech
创建一个语音控制的应用程序,这是非常好的(功能,而不是我的应用程序),但有时用户(我)嘟a一下。如果该单词的某些合理部分与某个合理命令的某些合理部分相匹配,那么接受该命令会很好。因此,我搜索了一个名为单词集中最大相交词的算法的圣杯。一些新鲜的聪明的头脑会让我走出绝望的洞穴吗?
示例
"rotation" in ["notable","tattoo","onclick","statistically"]
应匹配纹身,因为它与旋转( tat_o )的交叉点最长。 统计是第二好的( tati 相交),因为需要忽略该单词的较长部分(但这是奖励条件,没有它就可以接受)。< / p>
备注
我尝试了什么?
嗯,这非常令人尴尬......
for(var i=10; i>=4; --i) // reasonable substring
for(var word in words) // for all words in the set
for(var j=0; j<word.length-i; ++j) // search for any i substring
// aaargh... three levels of abstraction is too much for me
答案 0 :(得分:2)
这是一种似乎有效的算法。与其他已经建立的算法相比,我不知道它的表现有多好(我怀疑它表现得更糟)但也许它让你知道如何做到这一点:
<强> FIDDLE 强>
var minInt = 3;
var arr = ["notable","tattoo","onclick","statistically"];
var word = "rotation";
var res = [];
if (word.length >= minInt) {
for (var i = 0; i < arr.length; i++) {
var comp = arr[i];
var m = 0;
if (comp.length >= minInt) {
for (var l = 0; l < comp.length - minInt + word.length - minInt + 1; l++) {
var subcomp = l > word.length - minInt ? comp.substring(l - word.length + minInt) : comp;
var subword = l < word.length - minInt ? word.substring(word.length - minInt - l) : word;
var minL = Math.min(subcomp.length, subword.length);
var matches = 0;
for (var k = 0; k < minL; k++) {
if (subcomp[k] === subword[k]) {
matches++;
}
}
if (matches > m) {
m = matches;
}
}
}
res[i] = m >= minInt ? m : null;
}
}
console.log(res);
会发生的是,它通过“移动”相对于另一个来比较两个字符串并计算每个位置中的匹配字母。在这里,您可以看到rotation vs. notable
的比较“子”字:
ion / notable --> one match on index 1
tion / notable --> no match
ation / notable --> no match
tation / notable --> one match on index 2
otation / notable --> no match
rotation / notable --> three matches on index 1,2,3
rotation / otable --> no match
rotation / table --> no match
rotation / able --> no match
rotation / ble --> no match
如您所见,最大匹配数为3,这就是它将返回的内容。
答案 1 :(得分:1)
以下是Javascript中Levenshtein距离计算器的实现。
返回包含匹配命令和距离的对象。
var commandArr = ["cat", "dog", "fish", "copy", "delete"]
var testCommand = "bopy";
function closestMatch(str, arr)
{
//console.log("match called");
var matchDist = [];
var min, pos;
for(var i=0; i<arr.length; i++)
{
matchDist[i]=calcLevDist(str, arr[i]);
console.log("Testing "+ str + " against " + arr[i]);
}
//http://stackoverflow.com/questions/5442109/how-to-get-the-min-elements-inside-an-array-in-javascript
min = Math.min.apply(null,matchDist);
pos = matchDist.indexOf(min);
var output = { match : arr[pos],
distance : matchDist[pos]
};
return output;
}
function calcLevDist (str1, str2)
{
//console.log("calc running");
var cost = 0 , len1, len2;
var x = 1;
while(x > 0)
{
len1 = str1.length;
console.log("Length of String 1 = " + len1);
len2 = str2.length;
console.log("Length of String 2 = " + len2);
if(len1 == 0)
{
cost+= len2;
return cost;
}
if(len2 == 0)
{
cost+= len1;
return cost;
}
x = Math.min(len1,len2);
if(str1.charAt(len1 -1) != str2.charAt(len2 -1))
{
cost++;
}
else
console.log(str1.charAt(len1-1) + " matches " + str2.charAt(len2-1));
str1 = str1.substring(0, len1 -1 );
str2 = str2.substring(0, len2 -1 );
console.log("Current Cost = " + cost);
}
}
var matchObj = closestMatch(testCommand, commandArr);
var match = matchObj["match"];
var dist = matchObj["distance"];
$("#result").html("Closest match to " + testCommand + " = " + match + " with a Lev Distance of " + dist + "." )
你可以弄乱小提琴here。
答案 2 :(得分:0)
感谢basilikum和JasonNichols以及Mike和Andrew的评论,这真的帮助我完成了算法。我提出了自己的蛮力O(n^3)
解决方案,以防有人遇到同样问题的问题。
邀请任何人与the fiddle一起进行改进。
算法
/**
* Fuzzy match for word in array of strings with given accurancy
* @param string needle word to search
* @param int accurancy minimum matching characters
* @param array haystack array of strings to examine
* @return string matching word or undefined if none is found
*/
function fuzzyMatch(needle,accurancy,haystack) {
function strcmpshift(a,b,shift) {
var match=0, len=Math.min(a.length,b.length);
for(var i in a) if(a[i]==b[+i+shift]) ++match;
return match;
}
function strcmp(a,b) {
for(var i=0,max=0,now; i<b.length; ++i) {
now = strcmpshift(a,b,i);
if(now>max) max = now;
}
return max;
}
var word,best=accurancy-1,step,item;
for(var i in haystack) {
item = haystack[i];
step = Math.max(strcmp(item,needle),strcmp(needle,item));
if(step<=best) continue;
best=step, word=item;
};
return word;
}
示例强>
var word = "rotation";
var commands = ["notable","tattoo","onclick","statistically"];
// find the closest command with at least 3 matching characters
var command = fuzzyMatch(word,3,commands);
alert(command); // tattoo