您是否在Javascript中使用HTML标记解析子文本字符串?
例如:
var str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.'
html_substr(str, 20)
// return Lorem ipsum <a href="#">dolor <strong>si</strong></a>
html_substr(str, 30)
// return Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, co
答案 0 :(得分:8)
考虑到parsing html with regex is a bad idea,这里有一个解决方案:)
编辑:只是要清楚:这不是一个有效的解决方案,它意味着对输入字符串作出非常宽松的假设,因此应该采取一些盐。阅读上面的链接,看看为什么永远不能用正则表达式解析html。
function htmlSubstring(s, n) {
var m, r = /<([^>\s]*)[^>]*>/g,
stack = [],
lasti = 0,
result = '';
//for each tag, while we don't have enough characters
while ((m = r.exec(s)) && n) {
//get the text substring between the last tag and this one
var temp = s.substring(lasti, m.index).substr(0, n);
//append to the result and count the number of characters added
result += temp;
n -= temp.length;
lasti = r.lastIndex;
if (n) {
result += m[0];
if (m[1].indexOf('/') === 0) {
//if this is a closing tag, than pop the stack (does not account for bad html)
stack.pop();
} else if (m[1].lastIndexOf('/') !== m[1].length - 1) {
//if this is not a self closing tag than push it in the stack
stack.push(m[1]);
}
}
}
//add the remainder of the string, if needed (there are no more tags in here)
result += s.substr(lasti, n);
//fix the unclosed tags
while (stack.length) {
result += '</' + stack.pop() + '>';
}
return result;
}
示例: http://jsfiddle.net/danmana/5mNNU/
注意:patrick dw's solution对于糟糕的HTML可能更安全,但我不确定它处理空格的程度如何。
答案 1 :(得分:5)
它是单个标签的解决方案
function subStrWithoutBreakingTags(str, start, length) {
var countTags = 0;
var returnString = "";
var writeLetters = 0;
while (!((writeLetters >= length) && (countTags == 0))) {
var letter = str.charAt(start + writeLetters);
if (letter == "<") {
countTags++;
}
if (letter == ">") {
countTags--;
}
returnString += letter;
writeLetters++;
}
return returnString;
}
答案 2 :(得分:4)
用法:
var str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.';
var res1 = html_substr( str, 20 );
var res2 = html_substr( str, 30 );
alert( res1 ); // Lorem ipsum <a href="#">dolor <strong>si</strong></a>
alert( res2 ); // Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, co
示例: http://jsfiddle.net/2ULbK/4/
功能:
function html_substr( str, count ) {
var div = document.createElement('div');
div.innerHTML = str;
walk( div, track );
function track( el ) {
if( count > 0 ) {
var len = el.data.length;
count -= len;
if( count <= 0 ) {
el.data = el.substringData( 0, el.data.length + count );
}
} else {
el.data = '';
}
}
function walk( el, fn ) {
var node = el.firstChild;
do {
if( node.nodeType === 3 ) {
fn(node);
// Added this >>------------------------------------<<
} else if( node.nodeType === 1 && node.childNodes && node.childNodes[0] ) {
walk( node, fn );
}
} while( node = node.nextSibling );
}
return div.innerHTML;
}
答案 3 :(得分:0)
let str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.'
let plainText = htmlString.replace(/<[^>]+>/g, '');
使用上述给定的正则表达式提取纯文本,然后使用基于JS String的“ .substr()”函数获得所需的结果
答案 4 :(得分:-1)
Javascript有一个子字符串方法。如果字符串包含html,则没有区别。
答案 5 :(得分:-1)
使用与= str.replace(/<[^>]*>?/gi, '').substr(0, 20);
类似的内容
我在http://fiddle.jshell.net/xpW9j/1/