剥离html标签,'RegEx-freeway'

时间:2014-05-09 21:28:23

标签: javascript html html-sanitizing

我最近对那些通过使用正则表达式来清理任务来处理任务的解决方案有点偏执。 html字符串。他们在很大程度上取决于如何防弹'给出正则表达式。所以,我想出了这个片段,并希望从社区获得一些反馈。感谢。

//
// #notags
String.prototype.notags = (function (doc) {

  var mkel     = doc.createElement.bind(doc);
  var hasown   = Function.prototype.call.bind(Object.prototype.hasOwnProperty);

  // #textlike-nodes
  var textlike = {

    12 : "NOTATION_NODE", 
    3  : "TEXT_NODE", 
    4  : "CDATA_SECTION_NODE", 
    5  : "ENTITY_REFERENCE_NODE", 
    6  : "ENTITY_NODE", 
    7  : "PROCESSING_INSTRUCTION_NODE", 
    8  : "COMMENT_NODE"

  };

  // #_notags
  // main function
  var _notags = function (tagedstring) {

    var div;
    var istxt = istextnode;
    var nodes;
    var nodetxt = getxt;
    var res;

    div = mkel('div');
    div.innerHTML = (''+ tagedstring);


    // get the div's descendants 
    // and read their text content 
    // until all of its childern are plain 
    // text nodes...

    nodes = descendants(div);

    while (!nodes.every(istxt)) {
      div.innerHTML = nodetxt(div);
      nodes = descendants(div);
    }

    res = div.innerHTML;

    // get rid of temporary div
    // prevents mem. leaks
    div.innerHTML = '';
    delete div;

    return res;
  };


  // #save
  // String#notags
  return function () {
    return _notags(this);
  };



  ////////////////
  ////// #helpers

  // #istextnode
  function istextnode (node) {
    return !(3 - node.nodeType);
  }

  // #descendants
  function descendants (startnode, _nodels) {

    _nodels || (_nodels = []);

    var node = startnode.firstChild;

    while (node) {

      _nodels.push(node);
      descendants(node, _nodels);

      node = node.nextSibling;
    }

    return _nodels;
  }

  // #getxt
  // loop each node's descendant 
  // and fetch it' text content
  function getxt (node) {

    var _ = {
      str: '', 
      txt: textlike
    };

    descendants(node)
    .forEach(getnodetext, _);

    return _.str;
  }

  // #getnodetext
  function getnodetext (node) {
    //this: {str, txt}
    if (hasown(this.txt, node.nodeType))
      this.str += (node.data || node.textContent || node.nodeValue);
  }


})(document);

// /eof

0 个答案:

没有答案