高级JavaScript RegExp替换HTML标记内

时间:2012-12-31 18:33:05

标签: javascript regex

我的javascript代码效果非常好:

var rgx = /MyName/g;
var curInnerHTML = document.body.innerHTML;
curInnerHTML = curInnerHTML.replace(rgx, "<span><span class='myName'>MyNameReplace</span></span>");

问题是它匹配正则表达式,即使它包含在HTML属性和什么不包含的情况下。如何修改正则表达式以便在HTML内容中找到它?例如,在此字符串中

    <div class="someclass" title="MyName">
MyName
</div>

目前结果如下(请注意title属性的更改):

        <div class="someclass" title="<span><span class='myName'>MyNameReplace</span</span>">
<span><span class='myName'>
    MyNameReplace</span></span>
    </div>

但我需要它(保持标题属性不变):

    <div class="someclass" title="MyName">
<span><span class='myName'>MyNameReplace</span></span>
</div>

1 个答案:

答案 0 :(得分:3)

你最好的选择,它比听起来容易得多,试图使用正则表达式来解析HTML,但是要利用DOM已经拥有并递归处理的事实文本节点。

这是一个袖手旁观:Live Example | Source

// We use this div's `innerHTML` to parse the markup of each replacment
var div = document.createElement('div');

// This is the recursive-descent function that processes all text nodes
// within the element you give it and its descendants
function doReplacement(node, rex, text) {
    var child, sibling, frag;

    // What kind of node did we get?
    switch (node.nodeType) {
        case 1: // Element
            // Probably best to leave `script` elements alone.
            // You'll probably find you want to add to this list
            // (`object`, `applet`, `style`, ...)
            if (node.nodeName.toUpperCase() !== "SCRIPT") {
                // It's an element we want to process, start with its
                // *last* child and work forward, since part of what
                // we're doing inserts into the DOM.
                for (child = node.lastChild; child; child = sibling) {
                    // Before we change this node, grab a reference to the
                    // one that follows it
                    sibling = child.previousSibling;

                    // Recurse
                    doReplacement(child, rex, text);
                }
            }
            break;
        case 3: // Text
            // A text node -- let's do our replacements!
            // The first two deal with the fact that the text node
            // may have less-than symbols or ampersands in it.
            // The third, of course, does your replacement.
            div.innerHTML = node.nodeValue
                                .replace(/&/g, "&amp;")
                                .replace(/</g, "&lt;")
                                .replace(rex, text);

            // Now, the `div` has real live DOM elements for the replacement.
            // Insert them in front of this text node...
            insertChildrenBefore(div, node);
            // ...and remove the text node.
            node.parentNode.removeChild(node);
            break;
    }
}

// This function just inserts all of the children of the given container
// in front of the given reference node.
function insertChildrenBefore(container, refNode) {
    var parent, child, sibling;

    parent = refNode.parentNode;
    for (child = container.firstChild; child; child = sibling) {
        sibling = child.nextSibling;
        parent.insertBefore(child, refNode);
    }
}

您可以这样称呼:

doReplacement(document.body,
              /MyName/g,
              "<span><span class='myName'>MyNameReplace</span></span>");