为什么我的正则表达式不适用于JS?

时间:2015-07-03 06:40:10

标签: javascript regex

我有一块正则表达式在regex101中运行正常,但是当我尝试将它放在js函数中时它不起作用,这意味着它会产生错误的结果。

我需要data-sh-contentdata-sh-attr的价值。出于某种原因,我无法使用dom操作。它必须使用正则表达式。

var content = '<p><!-- content block start --></p><div class="content-block mceNonEditable" style="color: #999999;" data-sh-attr="%20color%3D%22%23999999%22" data-sh-content="%5Bgrid%5D%3Cp%3Elkjlk%3C%2Fp%3E%5B%2Fgrid%5D%5Bgrid%5D%3Cp%3Elkjlkj%3C%2Fp%3E%5B%2Fgrid%5D"><div class="toolbar"><button class="edit-content-block">Edit</button><button class="remove-content-block">Delete</button></div><div class="content"><div class="grid col-md-6" data-sh-attr="" data-sh-content="%3Cp%3Elkjlk%3C%2Fp%3E"><p>lkjlk</p></div><div class="grid col-md-6" data-sh-attr="" data-sh-content="%3Cp%3Elkjlkj%3C%2Fp%3E"><p>lkjlkj</p></div><div';

console.log('before restoring shortcode: %s', content);

var content_block_pattern = /<p><!-- content block start -->[\S\s]+?content-block[\S\s]+?data-sh-attr="([\s\S]+?)"[\s\S]+?data-sh-content="([\s\S]+?)"[\S\s]+?<!-- content block end --><\/p>/g;
content = content.replace(content_block_pattern, function(attributes, content){
    attributes = decodeURIComponent(attributes);
    content = decodeURIComponent(content);
    console.log("attributes %s", attributes);
    console.log("Conents: %s", content);
    return '[block '+attributes+']'+content+'[/block]';
});

console.log('after restoring shortcodes: %s', content);

它给出了错误的结果。在控制台中查看。 虽然我使用的是regex101中使用的相同模式,但它工作正常https://regex101.com/r/hF1wE3/1

这是一个jsfiddle,您可以使用http://jsfiddle.net/yLm3xwrw/

2 个答案:

答案 0 :(得分:0)

不要使用正则表达式来处理HTML。当你真正使用地球上最复杂的HTML解析器时,尤其不是

// parse HTML string into a DOM node
var content = '<p><!-- content block start --></p><div class="content-block mceNonEditable" style="color: #999999;" data-sh-attr="%20color%3D%22%23999999%22" data-sh-content="%5Bgrid%5D%3Cp%3Elkjlk%3C%2Fp%3E%5B%2Fgrid%5D%5Bgrid%5D%3Cp%3Elkjlkj%3C%2Fp%3E%5B%2Fgrid%5D"><div class="toolbar"><button class="edit-content-block">Edit</button><button class="remove-content-block">Delete</button></div><div class="content"><div class="grid col-md-6" data-sh-attr="" data-sh-content="%3Cp%3Elkjlk%3C%2Fp%3E"><p>lkjlk</p></div><div class="grid col-md-6" data-sh-attr="" data-sh-content="%3Cp%3Elkjlkj%3C%2Fp%3E"><p>lkjlkj</p></div>';
var container = document.createElement("DIV");
container.innerHTML = content;

// helper that can process an attribute of a node
function processAttribute(elem, attrib, func) {
    var val = elem.getAttribute(attrib);
    if (val) elem.setAttribute(attrib, func(val));
}

// now processing the attributes is straight-forward
var contentSel = ".content-block, .content-block *[data-sh-attr], content-block *[data-sh-content]",
    contentElems = container.querySelectorAll(contentSel),
    i;

for (i = 0; i < contentElems.length; i++) {
    processAttribute(contentElems[i], "data-sh-attr", decodeURIComponent);
    processAttribute(contentElems[i], "data-sh-content", decodeURIComponent);
}

console.log(container.innerHTML);

我认为实际上你已经拥有了DOM节点,在这种情况下,解析步骤就没有必要了。

答案 1 :(得分:-1)

替换回调函数第一个参数是all,而不是第一个匹配的字符串。固定代码如下。

content = content.replace(content_block_pattern, function(all, attributes, content){
    attributes = decodeURIComponent(attributes);
    content = decodeURIComponent(content);
    console.log("attributes %s", attributes);
    console.log("Conents: %s", content);
    return '[block '+attributes+']'+content+'[/block]';
});