我想从长文本中检索最短匹配,其中字符串在整个文本中重复。但是,找不到已匹配的文本中的匹配项。
这是我面临的问题的简化版本:
"ababc".match(/a.+c/g)
["ababc"]
["ababc", "abc"]
因此,我想知道是否有一种更简单的方法来检索子串"abc"
,而不是手动编写递归代码以在匹配中搜索。
答案 0 :(得分:1)
正如我的评论中提到的,你不能单独使用正则表达式做你想做的事。
你给出了一个简单的例子,所以我不确定这会带你走多远,但这是我在做你正在寻找的东西。我怀疑你的" a"和" c"字符不一样,因此您需要相应地修改它(例如将它们作为参数传递给函数)。
function getShortestMatch(str) {
var str = str || '';
var match,
index,
regex,
length,
results = [];
// iterate along the string one character at a time
for (index = 0, length = str.length; index < length; index++) {
// if the current character is 'a' (the beginning part of our substring match)
if (str[index] === 'a') {
// create a new regex that first consumes everything up to
// the starting character. Then matches for everything from there to
// the ending substring char 'c'. It is a lazy match so it will stop
// at the first matched ending char 'c'
regex = new RegExp('^.{' + index + '}(a.+?c)');
match = str.match(regex);
// if there is a match, then push to the results array
if (match && match[1]) {
results.push(match[1]);
}
}
}
// sort the results array ascending (shortest first)
results.sort(function(a,b){
return a.length - b.length;
});
// log all results matched to the console for sake of example
console.log(results);
// return the first (shortest) element
return results[0];
}
示例强>
getShortestMatch('ababcabbc');
// output showing all results found (from console.log in the function)
["abc", "abbc", "ababc"]
// return value
"abc"
注意:此功能不会尝试找到所有可能匹配的内容,而且#34;&#39; a&#39;和一个&#39;&#39;&#34;,因为你的问题是关于找到最短的。如果由于某种原因你想要所有可能的匹配,那么贪婪的.+
正则表达式将被抛入混合中。
答案 1 :(得分:0)
从每个连续字符开始循环子串(使用slice
),与锚定到字符串开头(^
)的正则表达式匹配,并使用非贪婪匹配({{ 1}}):
?
答案 2 :(得分:0)
由于其有效性,简单性和高效性,这是我的答案:
let seq = "us warship";
let source = "The traditional US adversary has also positioned a spy ship off the coast of Delaware and carried out flights near a US Navy warship, concerning American officials.";
let re = new RegExp(`\\b${seq.replace(/\s/g, "\\b.+?\\b")}\\b`, "gi");
let snippet = null;
let matches;
while (matches = re.exec(source)) {
let match = matches[0];
if (!snippet || match.length < snippet.length) {
snippet = match;
}
re.lastIndex -= (match.length - 1);
}
console.log(snippet); // "US Navy warship"