将HTML中的链接转换为锚点

时间:2015-11-18 14:50:34

标签: javascript jquery html angularjs

我有一个HTML文本,我需要用锚点替换链接(如www.so.com)。

输入是:

<p>Hi I have a nice website on www.so.com and would...</p>
<p>Click <a href='http://www.so.com'>this link</a></p>

输出应该返回:

<p>Hi I have a nice website on <a href='www.so.com'>www.so.com</a> and would...</p>
<p>Click <a href='http://www.so.com'>this link</a></p>

棘手的部分是HTML文本中已有的锚点。

我正在努力解决迄今为止的解决方案。过滤器第一次用锚点替换链接,但也是第二次......

.filter('autolink', ['$sanitize', function ($sanitize) {
var LINKY_URL_REGEXP =
    /((ftp|https?):\/\/|(www\.)|(mailto:)?[A-Za-z0-9._%+-]+@)\S*[^\s.;,(){}<>"\u201d\u2019]/i,
    MAILTO_REGEXP = /^mailto:/i;

return function (text, target, attributes) {
    if (!text) return text;
    var match;
    var raw = text;
    var html = [];
    var url;
    var i;
    while ((match = raw.match(LINKY_URL_REGEXP))) {
        // We can not end in these as they are sometimes found at the end of the sentence
        url = match[0];
        // if we did not match ftp/http/www/mailto then assume mailto
        if (!match[2] && !match[4]) {
            url = (match[3] ? 'http://' : 'mailto:') + url;
        }
        i = match.index;
        addText(raw.substr(0, i));
        addLink(url, match[0].replace(MAILTO_REGEXP, ''));
        raw = raw.substring(i + match[0].length);
    }
    addText(raw);
    return html.join('');

    function addText(text) {
        if (!text) {
            return;
        }
        html.push(text);
    }

    function addLink(url, text) {
        var key;
        html.push('<a ');
        if (angular.isFunction(attributes)) {
            attributes = attributes(url);
        }
        if (angular.isObject(attributes)) {
            for (key in attributes) {
                html.push(key + '="' + attributes[key] + '" ');
            }
        } else {
            attributes = {};
        }
        if (angular.isDefined(target) && !('target' in attributes)) {
            html.push('target="',
                target,
                '" ');
        }
        html.push('href="',
            url.replace(/"/g, '&quot;'),
            '">');
        addText(text);
        html.push('</a>');
    }
};

2 个答案:

答案 0 :(得分:0)

您可以借用showdown.js正则表达式来解析链接。它将解析明文并忽略HTML。

\b(((https?|ftp|dict):\/\/|www\.)[^'">\s]+\.[^'">\s]+)(?=\s|$)(?!["<>])

regex101.com test

请记住,它会在以下(奇怪形成的html)案例中错失:

  • <a href="www.google.com ">bla</a>
  • <a href="www.google.com\n">bla</a>(\ n是换行符)

答案 1 :(得分:0)

这是一个有点棘手的问题,因为文字link和&amp; Anchor标记共存。我试图解决这个问题。请看下面的代码。您也可以在Codepen上查看相同内容。

输入字符串(var plainText

<p>Hola! This is my sincere attempt on www.stackoverflow.com to solve this problem.</p><p>Click <a href=\'https://stackoverflow.com/questions/33783154/convert-links-in-html-to-anchors\'>here</a> to view my answer.</p><p>Thanks for your time!</p><p><a href="https://stackoverflow.com/">Stackoverflow rocks!</a></p>

输出字符串(var updatedTextAnchorify

<p>Hola! This is my sincere attempt on <a href="http://www.stackoverflow.com">www.stackoverflow.com</a> to solve this problem.</p><p>Click <a href="https://stackoverflow.com/questions/33783154/convert-links-in-html-to-anchors">here</a> to view my answer.</p><p>Thanks for your time!</p><p><a href="https://stackoverflow.com/">Stackoverflow rocks!</a></p>

代码段

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
  <title>Anchorify</title>
</head>
<body>
  <script>
  (function() {
    if (!String.anchorify) {
      String.prototype.anchorify = function() {
        var
          // http://, https://, ftp://
          urlPattern = /\b(?:https?|ftp):\/\/[a-z0-9-+&@#\/%?=~_|!:,.;]*[a-z0-9-+&@#\/%=~_|]/gim,
          // www., Sans http:// or https://
          pseudoUrlPattern = /(^|[^\/])(www\.[\S]+(\b|$))/gim,
          // Email addresses
          emailAddressPattern = /[\w.]+@[a-zA-Z_-]+?(?:\.[a-zA-Z]{2,6})+/gim;

        return this
          .replace(urlPattern, '<a href="$&">$&</a>')
          .replace(pseudoUrlPattern, '$1<a href="http://$2">$2</a>')
          .replace(emailAddressPattern, '<a href="mailto:$&">$&</a>');
      };
    }

    var
      // Initial text to be converted/anchorified
      // initialText =
      plainText = '<p>Hola! This is my sincere attempt on www.stackoverflow.com to solve this problem.</p><p>Click <a href=\'https://stackoverflow.com/questions/33783154/convert-links-in-html-to-anchors\'>here</a> to view my answer.</p><p>Thanks for your time!</p><p><a href="https://stackoverflow.com/">Stackoverflow rocks!</a></p>',
      // plainText = initialText.replace(/\r?\n|\r/gim, ''),
      matchesObj = {},
      anchorTagPattern = /(<a href=(?:'|")([^"]+)(?:'|")>([^<]+)<\/a>)/gim,
      updatedText = plainText.replace(anchorTagPattern, function () {
        var slice = Array.prototype.slice.call(arguments, 1, 4);
        matchesObj[slice[1]] = '<a href="' + slice[1] + '">' + slice[2] + '</a>';
        return slice[1];
      }),
      updatedTextAnchorify = updatedText.anchorify();

    // Iterate
    for (var property in matchesObj) {
      if (matchesObj.hasOwnProperty(property)) {
        var replaceStr = '<a href="' + property + '">' + property + '</a>';
        updatedTextAnchorify = updatedTextAnchorify.replace(replaceStr, matchesObj[property])
      }
    }

    // Write to the Document
    document.write(updatedTextAnchorify);
  })();
  </script>
</body>
</html>

另外,请看一下这个Stackoverflow答案,它清楚地解释了为什么滚动自己的正则表达式来解析URL是一个糟糕的想法&amp;还提供了一些有用的参考资料。

要测试您自己的输入字符串,只需更改var plainText

我希望这有帮助!