如何找到以http://开头并在其周围换行标签的页面上的每个单词?
我可以使用像正则表达式这样的东西吗?
答案 0 :(得分:5)
我不同意jQuery可以在这里找到解决方案。当然,你必须使用一些textNode元素属性来解决这些问题,但是在分割匹配的节点之后将DOM重新组合在一起可以使用jQuery库变得更容易。
以下代码以内联方式记录,以解释所采取的操作。我把它写成了一个jQuery插件,以防你只是想把它带到其他地方。这样,您可以定义要转换URL的元素,或者只需使用$(“body”)选择器。
(function($) {
$.fn.anchorTextUrls = function() {
// Test a text node's contents for URLs and split and rebuild it with an achor
var testAndTag = function(el) {
// Test for URLs along whitespace and punctuation boundaries (don't look too hard or you will be consumed)
var m = el.nodeValue.match(/(https?:\/\/.*?)[.!?;,]?(\s+|"|$)/);
// If we've found a valid URL, m[1] contains the URL
if (m) {
// Clone the text node to hold the "tail end" of the split node
var tail = $(el).clone()[0];
// Substring the nodeValue attribute of the text nodes based on the match boundaries
el.nodeValue = el.nodeValue.substring(0, el.nodeValue.indexOf(m[1]));
tail.nodeValue = tail.nodeValue.substring(tail.nodeValue.indexOf(m[1]) + m[1].length);
// Rebuild the DOM inserting the new anchor element between the split text nodes
$(el).after(tail).after($("<a></a>").attr("href", m[1]).html(m[1]));
// Recurse on the new tail node to check for more URLs
testAndTag(tail);
}
// Behave like a function
return false;
}
// For each element selected by jQuery
this.each(function() {
// Select all descendant nodes of the element and pick out only text nodes
var textNodes = $(this).add("*", this).contents().filter(function() {
return this.nodeType == 3
});
// Take action on each text node
$.each(textNodes, function(i, el) {
testAndTag(el);
});
});
}
}(jQuery));
$("body").anchorTextUrls(); //Sample call
请记住,根据我编写此方法来填充 textNodes 数组,该方法将找到所有后代文本节点,而不仅仅是直接子文本节点。如果您希望它仅替换特定选择器中的文本中的URL,请删除添加所选元素的所有后代的.add(“*”,this)调用。
答案 1 :(得分:3)
这是jQuery不能直接帮助你的少数几件事之一。您基本上必须遍历DOM树并检查文本节点(nodeType === 3
);如果您找到一个包含要包装的目标文本的文本节点(“http:// .....”,无论您要应用什么规则),那么拆分文本节点(使用splitText
)分为三个部分(字符串前面的部分,字符串部分和字符串后面的部分),然后将a
元素放在第二部分。
听起来有点复杂,但实际上并不是那么糟糕。它只是一个递归下降walker函数(用于处理DOM),正则表达式匹配以查找要替换的内容,然后调用splitText
,createElement
,{{3} },insertBefore
。
这是一个搜索固定字符串的示例;只需添加“http://”的正则表达式匹配:
walk(document.body, "foo");
function walk(node, targetString) {
var child;
switch (node.nodeType) {
case 1: // Element
for (child = node.firstChild;
child;
child = child.nextSibling) {
walk(child, targetString);
}
break;
case 3: // Text node
handleText(node, targetString);
break;
}
}
function handleText(node, targetString) {
var start, targetNode, followingNode, wrapper;
// Does the text contain our target string?
// (This would be a regex test in your http://... case)
start = node.nodeValue.indexOf(targetString);
if (start >= 0) {
// Split at the beginning of the match
targetNode = node.splitText(start);
// Split at the end of the match
followingNode = targetNode.splitText(targetString.length);
// Wrap the target in an element; in this case, we'll
// use a `span` with a class, but you'd use an `a`.
// First we create the wrapper and insert it in front
// of the target text.
wrapper = document.createElement('span');
wrapper.className = "wrapper";
targetNode.parentNode.insertBefore(wrapper, targetNode);
// Now we move the target text inside it
wrapper.appendChild(targetNode);
// Clean up any empty nodes (in case the target text
// was at the beginning or end of a text ndoe)
if (node.nodeValue.length == 0) {
node.parentNode.removeChild(node);
}
if (followingNode.nodeValue.length == 0) {
followingNode.parentNode.removeChild(followingNode);
}
}
}
更新:如果同一文本节点中存在多个匹配项,则上述操作无法处理(doh!)。哎呀,我做了一个正则表达式匹配 - 你将必须调整正则表达式,并且可能会对每个匹配进行一些后处理,因为这里的内容太简单了。但这是一个开始:
// The regexp should have a capture group that
// will be the href. In our case below, we just
// make it the whole thing, but that's up to you.
// THIS REGEXP IS ALMOST CERTAINLY TOO SIMPLISTIC
// AND WILL NEED ADJUSTING (for instance: what if
// the link appears at the end of a sentence and
// it shouldn't include the ending puncutation?).
walk(document.body, /(http:\/\/[^ ]+)/i);
function walk(node, targetRe) {
var child;
switch (node.nodeType) {
case 1: // Element
for (child = node.firstChild;
child;
child = child.nextSibling) {
walk(child, targetRe);
}
break;
case 3: // Text node
handleText(node, targetRe);
break;
}
}
function handleText(node, targetRe) {
var match, targetNode, followingNode, wrapper;
// Does the text contain our target string?
// (This would be a regex test in your http://... case)
match = targetRe.exec(node.nodeValue);
if (match) {
// Split at the beginning of the match
targetNode = node.splitText(match.index);
// Split at the end of the match.
// match[0] is the full text that was matched.
followingNode = targetNode.splitText(match[0].length);
// Wrap the target in an `a` element.
// First we create the wrapper and insert it in front
// of the target text. We use the first capture group
// as the `href`.
wrapper = document.createElement('a');
wrapper.href = match[1];
targetNode.parentNode.insertBefore(wrapper, targetNode);
// Now we move the target text inside it
wrapper.appendChild(targetNode);
// Clean up any empty nodes (in case the target text
// was at the beginning or end of a text ndoe)
if (node.nodeValue.length == 0) {
node.parentNode.removeChild(node);
}
if (followingNode.nodeValue.length == 0) {
followingNode.parentNode.removeChild(followingNode);
}
// Continue with the next match in the node, if any
match = followingNode
? targetRe.exec(followingNode.nodeValue)
: null;
}
}
答案 2 :(得分:-1)
我实际上不是,但你可以尝试一下
$('a([href^="http://"])').each( function(){
//perform your task
})