我有以下函数对单个字符串inputHtml
执行多个替换操作。它运作良好,但需要太长时间。是否可以通过组合来加速它?
/* Receives HTML code and returns the plain text contained in the HTML code */
function decodeHtml(inputHtml) {
const commentsRemoved = inputHtml.replace(/<!--[\s\S]*?-->/gm, '');
const linebreaksAdded = commentsRemoved.replace(/<br>/gm, '\n');
const tagsRemoved = linebreaksAdded.replace(/<(?:.|\n)*?>/gm, '');
const linebreaksRemoved = tagsRemoved.replace(/^\s*[\r\n]/gm, '');
const plainText = entities.decode(linebreaksRemoved);
return plainText;
}
答案 0 :(得分:1)
由于您正在使用换行进行一些替换,因此要将其替换为单行 传递正则表达式,你必须做一点功能组合。
正则表达式解释
( <!-- [\s\S]*? --> ) # (1), return ''
|
(?: # Blank lines, simulate ^ multiline
( \r? \n ) # (2), return $2
| ( # (3 start)
( \r? ) # (4), return $4 + '\n'
<br>
) # (3 end)
)
(?: \s | <br> | <!-- [\s\S]*? --> )*
\r?
(?: \n | <br> )
|
( <br> ) # (5), return '\n'
|
( < [\s\S]*? > ) # (6), return ''
JS代码
var input = 'here<br> <br> <br> <br><!-- <br> --> <br><br><br><br>and here<br>and there ';
var output = input.replace(/(<!--[\s\S]*?-->)|(?:(\r?\n)|((\r?)<br>))(?:\s|<br>|<!--[\s\S]*?-->)*\r?(?:\n|<br>)|(<br>)|(<[\s\S]*?>)/g,
function(m,p1,p2,p3,p4,p5,p6) {
if ( p1 || p6 )
return "";
//
if ( p2 )
return p2;
if ( p3 )
return p4 + "\n";
//
if ( p5 )
return "\n";
});
console.log(output);
&#13;
输入
here<br> <br> <br> <br><!-- <br> --> <br><br><br><br>and here<br>and there
输出
here
and here
and there