我想比较两个字符串(a之前和之后)并准确检测它们之间的位置和变化。
如有任何改变,我想知道:
假设字符串一次只会在一个地方发生变化(例如,从不“ B il l ” - >“ K IL 名词“)。
此外,我需要开始和结束位置来反映变化的类型:
例如:
"0123456789" -> "03456789"
Start: 1, End: 2, Change: "" (deletion)
"03456789" -> "0123456789"
Start: 1, End: 1, Change: "12" (insertion)
"Hello World!" -> "Hello Aliens!"
Start: 6, End: 10, Change: "Aliens" (replacement)
"Hi" -> "Hi"
Start: 0, End: 0, Change: "" (no change)
我能够在某种程度上检测到已更改文本的位置,但它并不适用于所有情况,因为为了准确地执行此操作,我需要知道所做的更改。
var OldText = "My edited string!";
var NewText = "My first string!";
var ChangeStart = 0;
var NewChangeEnd = 0;
var OldChangeEnd = 0;
console.log("Comparing start:");
for (var i = 0; i < NewText.length; i++) {
console.log(i + ": " + NewText[i] + " -> " + OldText[i]);
if (NewText[i] != OldText[i]) {
ChangeStart = i;
break;
}
}
console.log("Comparing end:");
// "Addition"?
if (NewText.length > OldText.length) {
for (var i = 1; i < NewText.length; i++) {
console.log(i + "(N: " + (NewText.length - i) + " O: " + (OldText.length - i) + ": " + NewText.substring(NewText.length - i, NewText.length - i + 1) + " -> " + OldText.substring(OldText.length - i, OldText.length - i + 1));
if (NewText.substring(NewText.length - i, NewText.length - i + 1) != OldText.substring(OldText.length - i, OldText.length - i + 1)) {
NewChangeEnd = NewText.length - i;
OldChangeEnd = OldText.length - i;
break;
}
}
// "Deletion"?
} else if (NewText.length < OldText.length) {
for (var i = 1; i < OldText.length; i++) {
console.log(i + "(N: " + (NewText.length - i) + " O: " + (OldText.length - i) + ": " + NewText.substring(NewText.length - i, NewText.length - i + 1) + " -> " + OldText.substring(OldText.length - i, OldText.length - i + 1));
if (NewText.substring(NewText.length - i, NewText.length - i + 1) != OldText.substring(OldText.length - i, OldText.length - i + 1)) {
NewChangeEnd = NewText.length - i;
OldChangeEnd = OldText.length - i;
break;
}
}
// Same length...
} else {
// Do something
}
console.log("Change start: " + ChangeStart);
console.log("NChange end : " + NewChangeEnd);
console.log("OChange end : " + OldChangeEnd);
console.log("Change: " + OldText.substring(ChangeStart, OldChangeEnd + 1));
如何判断是否进行了插入,删除或替换?
答案 0 :(得分:4)
我已经完成了你的代码,你匹配字符串的逻辑对我来说很有意义。它正确记录ChangeStart
,NewChangeEnd
和OldChangeEnd
,算法流程正常。您只想知道是否发生了插入,删除或替换。这就是我要怎么做的。
首先,你需要确保在你得到第一个不匹配点后,即ChangeStart
当你从末尾遍历字符串时,索引不应该越过{{ 1}}。
我给你举个例子。请考虑以下字符串:
ChangeStart
这种情况下的问题是当它从后面开始匹配时,流程是这样的:
var NewText = "Hello Worllolds!";
var OldText = "Hello Worlds!";
ChangeStart -> 10 //Makes sense
OldChangeEnd -> 8
NewChangeEnd -> 11
console.log("Change: " + NewText.substring(ChangeStart, NewChangeEnd + 1));
//Ouputs "lo"
假设,我刚刚说的是有道理的,你只需修改你的 Comparing end:
1(N: 12 O: 12: ! -> !)
2(N: 11 O: 11: s -> s)
3(N: 10 O: 10: d -> d) -> You need to stop here!
//Although there is not a mismatch, but we have reached ChangeStart and
//we have already established that characters from 0 -> ChangeStart-1 match
//That is why it outputs "lo" instead of "lol"
循环:
for
这种情况 - &gt; if (NewText.length > OldText.length) {
for (var i = 1; i < NewText.length && ((OldText.length-i)>=ChangeStart); i++) {
...
NewChangeEnd = NewText.length - i -1;
OldChangeEnd = OldText.length - i -1;
if(//Mismatch condition reached){
//break..That code is fine.
}
}
负责处理我提到的异常,因此如果达到这个条件,(OldText.length-i)>=ChangeStart
循环会自动终止。然而,正如我所提到的那样,在遇到错误匹配之前可能会遇到这种情况,就像我刚才所说的那样。因此,您需要将for
和NewChangeEnd
的值更新为比匹配的值小1。如果不匹配,您可以适当地存储值。
我们可以将这两个条件包裹起来,而不是OldChangeEnd
,我们知道else -if
肯定是不是真的,即它是替换< / strong>或删除。再次NewText.length > OldText.length
也意味着它可以是替换或插入,根据您的示例,这是有道理的。所以NewText.length > OldText.length
可能是这样的:
else
如果您已了解到目前为止的细微变化,确定具体案例非常简单:
else {
for (var i = 1; i < OldText.length && ((OldText.length-i)>=ChangeStart); i++) {
...
NewChangeEnd = NewText.length - i -1;
OldChangeEnd = OldText.length - i -1;
if(//Mismatch condition reached){
//break..That code is fine.
}
}
。已从ChangeStart > NewChangeEnd
删除字符串。已删除的文字 - &gt; ChangeStart -> OldChangeEnd
OldText.substring(ChangeStart, OldChangeEnd + 1);
。在ChangeStart > OldChangeEnd
插入字符串。插入的文字 - &gt; ChangeStart
NewText.substring(ChangeStart, NewChangeEnd + 1);
且上述两个条件不,则 替换。 替换旧字符串中的文字 - &gt; NewText != OldText
替换文字 - &gt; OldText.substring(ChangeStart, OldChangeEnd + 1);
已取代的NewText.substring(ChangeStart, NewChangeEnd + 1);
中的开始和结束位置 - &gt; OldText
我创建了一个jsfiddle,其中包含了我在代码中提到的更改。你可能想看一下。希望它能让你开始朝着正确的方向前进。
答案 1 :(得分:1)
我遇到了类似的问题,并通过以下方式解决了这个问题:
function diff(oldText, newText) {
// Find the index at which the change began
var s = 0;
while(s < oldText.length && s < newText.length && oldText[s] == newText[s]) {
s++;
}
// Find the index at which the change ended (relative to the end of the string)
var e = 0;
while(e < oldText.length &&
e < newText.length &&
oldText.length - e > s &&
newText.length - e > s &&
oldText[oldText.length - 1 - e] == newText[newText.length - 1 - e]) {
e++;
}
// The change end of the new string (ne) and old string (oe)
var ne = newText.length - e;
var oe = oldText.length - e;
// The number of chars removed and added
var removed = oe - s;
var added = ne - s;
var type;
switch(true) {
case removed == 0 && added > 0: // It's an 'add' if none were removed and at least 1 added
type = 'add';
break;
case removed > 0 && added == 0: // It's a 'remove' if none were added and at least one removed
type = 'remove';
break;
case removed > 0 && added > 0: // It's a replace if there were both added and removed characters
type = 'replace';
break;
default:
type = 'none'; // Otherwise there was no change
s = 0;
}
return { type: type, start: s, removed: removed, added: added };
}
注意,这并没有解决我的实际问题。我的问题是我有一个带有段落的编辑器,每个段落都用文本建模,还有一组用开始和结束索引定义的标记,例如:从char 1到char 5的粗体。我使用它来检测字符串的更改,以便我可以相应地移动标记索引。但请考虑字符串:
XX <强> XXXX 强> XX
diff函数方法无法区分添加到粗体外或其中的字符。
最后,我采用了一种完全不同的方法 - 我只是解析了编辑器生成的HTML并用它来确定标记的开始和结束索引。
答案 2 :(得分:1)
使用与上述相同的策略(从前到后,从后到前寻找差异),制作了我自己的稍微高性能的版本
function compareText(oldText, newText)
{
var difStart,difEndOld,difEndNew;
//from left to right - look up the first index where characters are different
for(let i=0;i<oldText.length;i++)
{
if(oldText.charAt(i) !== newText.charAt(i))
{
difStart = i;
break;
}
}
//from right to left - look up the first index where characters are different
//first calc the last indices for both strings
var oldMax = oldText.length - 1;
var newMax = newText.length - 1;
for(let i=0;i<oldText.length;i++)
{
if(oldText.charAt(oldMax-i) !== newText.charAt(newMax-i))
{
//with different string lengths, the index will differ for the old and the new text
difEndOld = oldMax-i;
difEndNew = newMax-i;
break;
}
}
var removed = oldText.substr(difStart,difEndOld-difStart+1);
var added = newText.substr(difStart,difEndNew-difStart+1);
return [difStart,added,removed];
}