在javascript中截断包含HTML标记的字符串

时间:2018-03-15 16:03:22

标签: javascript html function utf-8

给出了一个示例字符串。

s = "<p class="paragraph">Turnip greens yarrow ricebean rutabaga endive cauliflower sea  lettuce kohlrabi amaranth water <a href="https://www.google.pl/search?q=spinach" class="link">spinach</a> avocado daikon Süßkartoffel napa cabbage <strong>asparagus winter purslane kale. Celery potato scallion desert</strong> raisin horseradish spinach carrot soko. Lotus root water spinach fennel kombu maize <span style="font-size: 19px;color: blue;">bamboo shoot green bean swiss chard seakale pumpkin onion chickpea gram corn pea.</span> Brussels sprout coriander water chestnut gourd swiss chard wakame kohlrabi beetroot carrot watercress. Corn amaranth salsify bunya nuts nori azuki bean chickweed potato bell pepper artichoke.</p>"

字符串是UTF-8编码的。

我需要开发一个函数来截断字符串并在其末尾添加省略号。在以下条件下,截断的字符串不得超过n个字符,包括省略号:

  

不要分开一个字;

     

不要破坏html元素;

     

必须以适当的顺序关闭所有打开的标签。

1 个答案:

答案 0 :(得分:0)

我相信这是一个在大多数情况下应该有效的解决方案:

var s = "<p class='paragraph'>Turnip greens yarrow ricebean rutabaga endive cauliflower sea  lettuce kohlrabi amaranth water <a href='https://www.google.pl/search?q=spinach' class='link'>spinach</a> avocado daikon Süßkartoffel napa cabbage <strong>asparagus winter purslane kale. Celery potato scallion desert</strong> raisin horseradish spinach carrot soko. Lotus root water spinach fennel kombu maize <span style='font-size: 19px;color: blue;'>bamboo shoot green bean swiss chard seakale pumpkin onion chickpea gram corn pea.</span> Brussels sprout coriander water chestnut gourd swiss chard wakame kohlrabi beetroot carrot watercress. Corn amaranth salsify bunya nuts nori azuki bean chickweed potato bell pepper artichoke.</p>";

function trunc(str, n) {
  var openTags = [];
  var inTag = false;
  var openTagsSafe = [];
  var tagName = "";
  var tagNameComplete = false;
  var tagClosing = false;
  var lastSpace = 0;
  for (let p = 0; p < s.length && p < n; p++) {
    let c = str.charAt(p);
    switch (c) {
      case "<":
        lastSpace = ((!inTag && p + 3 < n) ? p : lastSpace);
        inTag = true;
        tagName = "";
        tagNameComplete = false;
        break;
      case ">":
        if (inTag && !tagNameComplete) {
          openTags.push({
            tag: tagName,
            p: p
          });
          tagNameComplete = true;
        }
        inTag = false;
        if (tagClosing) openTags.pop();
        tagClosing = false;
        tagName = ""; //may be redundant
        break;
      case "/":
        tagClosing = inTag;
        break;
      case " ":
        lastSpace = ((!inTag && p + 3 < n) ? p : lastSpace);
        if (inTag && !tagNameComplete) {
          openTags.push({
            tag: tagName,
            p: p
          });
          tagNameComplete = true;
        }
        break;
    }
    if (!tagNameComplete && c !== "<" && c !== ">") tagName += c;
  }
  //console.log(openTags, inTag, tagName, tagNameComplete, tagClosing, lastSpace);
  var small = str.substring(0, lastSpace)+"...";
  for (var i = openTags.length - 1; i >= 0; i--)
    if (openTags[i].p <= lastSpace) small += "</" + openTags[i].tag + ">";
  return small;
}

console.log(trunc(s, 300));