从电子邮件正文中删除html标记

时间:2015-07-07 17:09:38

标签: google-apps-script google-sheets gmail

我有一张包含从Gmail邮件中提取的数据的工作表。我必须使用getBody()而不是getPlainBody(),因为电子邮件中有一个表格。从提取的电子邮件正文中删除html标记的最简单方法是什么?

以下是我目前使用的代码,每60秒运行一次:

function getORHPEmails() {
 var destArray = new Array();
 var oldLabel = GmailApp.getUserLabelByName('NEW ORHP JOBS');
 var newLabel = GmailApp.getUserLabelByName('TEST - PROCESSED');
 var threads = oldLabel.getThreads(0,10);

  for(var n in threads){
    var msg = threads[n].getMessages();
    var destArrayRow = new Array();
      for(var m in msg){
                 destArrayRow.push(msg[m].getDate());
                 destArrayRow.push(msg[m].getSubject());
                 destArrayRow.push(msg[m].getBody());
       }
    destArray.push(destArrayRow);
    threads[n].removeLabel(oldLabel).addLabel(newLabel);
        }
    Logger.log(destArray);
    var ss = SpreadsheetApp.getActiveSpreadsheet();
    var sh = ss.getSheetByName('ORHP EMAIL');
    if(ss.getLastRow()==0){sh.getRange(1,1).setValue('getMessagesWithLabel()RESULTS')};
    sh.getRange(ss.getLastRow()+1,1,destArray.length,destArray[0].length).setValues(destArray)

}

1 个答案:

答案 0 :(得分:1)

我想出来了。以下是我使用的代码的变化:

function getTextFromHtml(html) {
  return getTextFromNode(Xml.parse(html, true).getElement());
}

var _itemNum; // Used to lead unordered & ordered list items.

function getTextFromNode(x) {
  switch(x.toString()) {
    case 'XmlText': return x.toXmlString();
    case 'XmlElement':
      var name = x.getName().getLocalName();
      Logger.log(name);
      var pre = '';
      var post = '';
      switch (name) {
        case 'br':
        case 'p':
          pre = '';
          post = '\n';
          break;
        case 'ul':
          pre = '';
          post = '\n';
          itemNum = 0;
          break;
        case 'ol':
          pre = '';
          post = '\n';
          _itemNum = 1;
          break;
        case 'li':
          pre = '\n' + (_itemNum == 0 ? ' - ' : (' '+ _itemNum++ +'. '));
          post = '';
          break;
        default:
          pre = '';
          post = '';
          break;
      }
      return pre + x.getNodes().map(getTextFromNode).join('') + post;
    default: return '';
  }
}

function getORHPEmails() {
 var destArray = new Array();
 var oldLabel = GmailApp.getUserLabelByName('NEW ORHP JOBS');
 var newLabel = GmailApp.getUserLabelByName('TEST - PROCESSED');
 var threads = oldLabel.getThreads(0,10);

  for(var n in threads){
        var msg = threads[n].getMessages();
        var destArrayRow = new Array();
          for(var m in msg){
                     destArrayRow.push(msg[m].getDate());
                     destArrayRow.push(msg[m].getSubject());
                     destArrayRow.push(getTextFromHtml(msg[m].getBody()));
           }
    destArray.push(destArrayRow);
    threads[n].removeLabel(oldLabel).addLabel(newLabel);
        }
Logger.log(destArray);
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sh = ss.getSheetByName('ORHP EMAIL');
if(ss.getLastRow()==0){sh.getRange(1,1).setValue('getMessagesWithLabel() RESULTS')};
sh.getRange(ss.getLastRow()+1,1,destArray.length,destArray[0].length).setValues(destArray)

}

归功于Remove html formatting when getting Body of a gmail message in javascript

的Mogsdad