Question

我们假设，我有以下文字：

This is a sample url : http://example.com.
These are some images:
<img src="http://example.com/sample1.png" class="sample-image" />
<img src="http://example.com/sample2.png" class="sample-image" />
Another url http://example2.com

以下是我用于解析上述文本的正则表达式代码：

const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&@#\/%?=~_|!:,.;-]*[-A-Z0-9+&@#\/%=~_|])/gim;
    const emails = /(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;

    return function(text) {
        if(text.match(urls)) {
            text = text.replace(urls, "<a href=\"$1\">$1</a>")
        }
        if(text.match(emails)) {
            text = text.replace(emails, "<a href=\"mailto:$1\">$1</a>")
        }

        return text
    }

上面的代码对我的文字做了这个：

This is a sample url : <a href="http://example.com">http://example.com</a>.
These are some images:
<img src="<a href=" class="sample-image">"http://example.com/sample1.png">"
<img src="<a href=" class="sample-image">"http://example.com/sample2.png">"
Another url <a href="http://example2.com">http://example2.com</a>

我希望得到以下结果：

This is a sample url : <a href="http://example.com">http://example.com</a>.
These are some images:
<img src="http://example.com/sample1.png" class="sample-image" /> <!-- Do not change -->
<img src="http://example.com/sample2.png" class="sample-image" /> <!-- Do not change -->
Another url <a href="http://example2.com">http://example2.com</a>

如何达到上述效果？

Answer 1

最好避免使用正则表达式来解析HTML。

RegEx match open tags except XHTML self-contained tags

Using regular expressions to parse HTML: why not?

<小时/> 相反，使用内容生成临时DOM元素并获取所有文本节点以更新内容。 where apply将方法替换为文本节点内容的正则表达式。

var html = 'This is a sample url : http://example.com These are some images:<img src="http://example.com/sample1.png" class="sample-image" /><img src="http://example.com/sample2.png" class="sample-image" />Another url http://example2.com';

// regex for replacing content
const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&@#\/%?=~_|!:,.;-]*[-A-Z0-9+&@#\/%=~_|])/gim;
const emails = /(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;

// for replacing the content
function update(text) {
  return text.replace(urls, "<a href=\"$1\">$1</a>").replace(emails, "<a href=\"mailto:$1\">$1</a>");
}

// create a DOM element
var temp = document.createElement('div');
// set the string as your content
temp.innerHTML = html;

console.log(
  // get all child nodes and convert into array
  // for older browser use `[].slice.call()`
  Array.from(temp.childNodes)
  // iterate over the elements to generate the content array
  .map(function(n) {
    // if node is text then update the content and return it
    if (n.nodeType == 3)
      return update(n.textContent);
    // otehrwise return the html content
    else
      return n.outerHTML;
    // join them 
  }).join('')
)

更新：如果您需要保留转义的HTML，则需要添加一个额外的方法来生成文本节点的相应转义HTML。

var html = 'This is a sample url : http://example.com These are some images:<img src="http://example.com/sample1.png" class="sample-image" /><img src="http://example.com/sample2.png" class="sample-image" />Another url http://example2.com  hi &lt;a href="#"&gt;Sam&lt;/a&gt;';

// regex for replacing content
const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&@#\/%?=~_|!:,.;-]*[-A-Z0-9+&@#\/%=~_|])/gim;
const emails = /(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;

// for replacing the content
function update(text) {
  return text.replace(urls, "<a href=\"$1\">$1</a>").replace(emails, "<a href=\"mailto:$1\">$1</a>");
}

// function for generating escaped html content for text node
function getEncodedText(node) {
    // temporary element  
    var temp = document.createElement('div');
    // append the text node
    temp.appendChild(node);
    // get the escaped html content
    return temp.innerHTML
  }
  // create a DOM element
var temp = document.createElement('div');
// set the string as your content
temp.innerHTML = html;

console.log(
  // get all child nodes and convert into array
  // for older browser use `[].slice.call()`
  Array.from(temp.childNodes)
  // iterate over the elements to generate the content array
  .map(function(n) {
    // if node is text then update the escaped html content and return it
    if (n.nodeType == 3)
      return update(getEncodedText(n));
    // otehrwise return the html content
    else
      return n.outerHTML;
    // join them 
  }).join('')
)

Answer 2

怎么样：

str='This is a sample url : http://example.com.\nThese are some images:\n<img src="http://example.com/sample1.png" class="sample-image" />\n<img src="http://example.com/sample2.png" class="sample-image" />\nAnother url http://example2.com';

str= str.replace(/[^"](https?:\/\/[^"\s]+)/g, '<a href="$1">$1</a>');
console.log(str);

<强>输出：

This is a sample url :<a href="http://example.com.">http://example.com.</a>
These are some images:
<img src="http://example.com/sample1.png" class="sample-image" />
<img src="http://example.com/sample2.png" class="sample-image" />
Another url<a href="http://example2.com">http://example2.com</a>

只有在不是html时才转换为锚点

2 个答案: