Question

我需要使用Javascript做三件事：

选择类别为“foo”的所有节点。
查找这些节点中以“*”开头的所有单词。
使用<span class="xyz"> ... </span>围绕这些字词，其中xyz是单词本身。

例如，内容：

<ul>
  <li class="foo">
    *abc def *ghi
  </li>
  <li class="bar">
    abc *def *ghi
  </li>
</ul>

会变成

<ul>
  <li class="foo">
    <span class="abc">*abc</span> def <span class="ghi">*ghi</span>
  </li>
  <li class="bar">
    abc *def *ghi    <!-- Not part of a node with class "foo", so
  </li>                     no changes made. -->
</ul>

我该怎么做？（P.S.涉及jQuery的解决方案也有效，但除此之外我不想包含任何其他依赖项。）

Answer 1

不需要jQuery：

UE_replacer = function (node) {

   // just for performance, skip attribute and
   // comment nodes (types 2 and 8, respectively)
   if (node.nodeType == 2) return;
   if (node.nodeType == 8) return;

   // for text nodes (type 3), wrap words of the
   // form *xyzzy with a span that has class xyzzy
   if (node.nodeType == 3) {

      // in the actual text, the nodeValue, change
      // all strings ('g'=global) that start and end
      // on a word boundary ('\b') where the first
      // character is '*' and is followed by one or
      // more ('+'=one or more) 'word' characters
      // ('\w'=word character). save all the word
      // characters (that's what parens do) so that
      // they can be used in the replacement string
      // ('$1'=re-use saved characters).
      var text = node.nodeValue.replace(
            /\b\*(\w+)\b/g,
            '<span class="$1">*$1</span>'   // <== Wrong!
      );

      // set the new text back into the nodeValue
      node.nodeValue = text;
      return;
   }

   // for all other node types, call this function
   // recursively on all its child nodes
   for (var i=0; i<node.childNodes.length; ++i) {
      UE_replacer( node.childNodes[i] );
   }
}

// start the replacement on 'document', which is
// the root node
UE_replacer( document );

更新：为了对比strager's answer的方向，我摆脱了我的拙劣jQuery并保持正则表达式尽可能简单。这种“原始”javascript方法比我预期的要容易得多。

虽然jQuery显然很适合操作DOM结构，但实际上并不容易弄清楚如何操作 text 元素。

Answer 2

不要尝试处理元素的innerHTML / html（）。这将永远不会起作用，因为正则表达式不足以解析HTML。只需遍历Text节点，寻找您想要的内容：

// Replace words in text content, recursively walking element children.
//
function wrapWordsInDescendants(element, tagName, className) {
    for (var i= element.childNodes.length; i-->0;) {
        var child= element.childNodes[i];
        if (child.nodeType==1) // Node.ELEMENT_NODE
            wrapWordsInDescendants(child, tagName, className);
        else if (child.nodeType==3) // Node.TEXT_NODE
            wrapWordsInText(child, tagName, className);
    }
}

// Replace words in a single text node
//
function wrapWordsInText(node, tagName, className) {

    // Get list of *word indexes
    //
    var ixs= [];
    var match;
    while (match= starword.exec(node.data))
        ixs.push([match.index, match.index+match[0].length]);

    // Wrap each in the given element
    //
    for (var i= ixs.length; i-->0;) {
        var element= document.createElement(tagName);
        element.className= className;
        node.splitText(ixs[i][1]);
        element.appendChild(node.splitText(ixs[i][0]));
        node.parentNode.insertBefore(element, node.nextSibling);
    }
}
var starword= /(^|\W)\*\w+\b/g;

// Process all elements with class 'foo'
//
$('.foo').each(function() {
    wrapWordsInDescendants(this, 'span', 'xyz');
});


// If you're not using jQuery, you'll need the below bits instead of $...

// Fix missing indexOf method on IE
//
if (![].indexOf) Array.prototype.indexOf= function(item) {
    for (var i= 0; i<this.length; i++)
        if (this[i]==item)
            return i;
    return -1;
}

// Iterating over '*' (all elements) is not fast; if possible, reduce to
// all elements called 'li', or all element inside a certain element etc.
//
var elements= document.getElementsByTagName('*');
for (var i= elements.length; i-->0;)
    if (elements[i].className.split(' ').indexOf('foo')!=-1)
        wrapWordsInDescendants(elements[i], 'span', 'xyz');

Answer 3

regexp看起来像这样（sed-ish语法）：

s/\*\(\w+\)\b\(?![^<]*>\)/<span class="\1">*\1</span>/g

因此：

$('li.foo').each(function() {
    var html = $(this).html();
    html = html.replace(/\*(\w+)\b(?![^<]*>)/g, "<span class=\"$1\">*$1</span>");
    $(this).html(html);
});

\*(\w+)\b细分是重要的一部分。它找到一个星号后跟一个或多个单词字符，后跟某种单词终止（例如行尾或空格）。该单词被捕获到$1中，然后将其用作文本和输出类。

之后的部分（(?![^<]*>)）是一个负面的预测。它声称闭合角度支架不会跟随，除非在它之前有一个开口角支架。这可以防止字符串在HTML标记内的匹配。这不会处理格式错误的HTML，但无论如何都不应该这样。

如何使用Javascript修改节点的内容？

3 个答案: