我有一张包含从Gmail邮件中提取的数据的工作表。我必须使用getBody()
而不是getPlainBody()
,因为电子邮件中有一个表格。从提取的电子邮件正文中删除html标记的最简单方法是什么?
以下是我目前使用的代码,每60秒运行一次:
function getORHPEmails() {
var destArray = new Array();
var oldLabel = GmailApp.getUserLabelByName('NEW ORHP JOBS');
var newLabel = GmailApp.getUserLabelByName('TEST - PROCESSED');
var threads = oldLabel.getThreads(0,10);
for(var n in threads){
var msg = threads[n].getMessages();
var destArrayRow = new Array();
for(var m in msg){
destArrayRow.push(msg[m].getDate());
destArrayRow.push(msg[m].getSubject());
destArrayRow.push(msg[m].getBody());
}
destArray.push(destArrayRow);
threads[n].removeLabel(oldLabel).addLabel(newLabel);
}
Logger.log(destArray);
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sh = ss.getSheetByName('ORHP EMAIL');
if(ss.getLastRow()==0){sh.getRange(1,1).setValue('getMessagesWithLabel()RESULTS')};
sh.getRange(ss.getLastRow()+1,1,destArray.length,destArray[0].length).setValues(destArray)
}
答案 0 :(得分:1)
我想出来了。以下是我使用的代码的变化:
function getTextFromHtml(html) {
return getTextFromNode(Xml.parse(html, true).getElement());
}
var _itemNum; // Used to lead unordered & ordered list items.
function getTextFromNode(x) {
switch(x.toString()) {
case 'XmlText': return x.toXmlString();
case 'XmlElement':
var name = x.getName().getLocalName();
Logger.log(name);
var pre = '';
var post = '';
switch (name) {
case 'br':
case 'p':
pre = '';
post = '\n';
break;
case 'ul':
pre = '';
post = '\n';
itemNum = 0;
break;
case 'ol':
pre = '';
post = '\n';
_itemNum = 1;
break;
case 'li':
pre = '\n' + (_itemNum == 0 ? ' - ' : (' '+ _itemNum++ +'. '));
post = '';
break;
default:
pre = '';
post = '';
break;
}
return pre + x.getNodes().map(getTextFromNode).join('') + post;
default: return '';
}
}
function getORHPEmails() {
var destArray = new Array();
var oldLabel = GmailApp.getUserLabelByName('NEW ORHP JOBS');
var newLabel = GmailApp.getUserLabelByName('TEST - PROCESSED');
var threads = oldLabel.getThreads(0,10);
for(var n in threads){
var msg = threads[n].getMessages();
var destArrayRow = new Array();
for(var m in msg){
destArrayRow.push(msg[m].getDate());
destArrayRow.push(msg[m].getSubject());
destArrayRow.push(getTextFromHtml(msg[m].getBody()));
}
destArray.push(destArrayRow);
threads[n].removeLabel(oldLabel).addLabel(newLabel);
}
Logger.log(destArray);
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sh = ss.getSheetByName('ORHP EMAIL');
if(ss.getLastRow()==0){sh.getRange(1,1).setValue('getMessagesWithLabel() RESULTS')};
sh.getRange(ss.getLastRow()+1,1,destArray.length,destArray[0].length).setValues(destArray)
}
归功于Remove html formatting when getting Body of a gmail message in javascript
的Mogsdad