我一直在使用Google应用程序'使用正则表达式解析gmail数据的脚本。邮件是在一个发件人的特定Gmail邮件标签中读取的,如下面的代码所示:
// Adapted from https://gist.github.com/Ferrari/9678772
//https://stackoverflow.com/questions/31345400/extract-info-from-email-body-with-google-scripts
function processInboxToSheet() {
// Have to get data separate to avoid google app script limit!
var start = 0;
var label = GmailApp.getUserLabelByName("Any label name here");
var threads = label.getThreads();
var SPREADSHEET_URL = "Any google sheet URL in here ";
var SHEET_NAME = 'Required Sheet tab name here';
var spreadsheet = SpreadsheetApp.openByUrl(SPREADSHEET_URL);
var qs_sheet = spreadsheet.getSheetByName(SHEET_NAME);
var result = [];
for (var i = 0; i < threads.length; i++) {
var messages = threads[i].getMessages();
var content = messages[0].getPlainBody();
// implement your own parsing rule inside
if (content) {
tmp = content.match(/Transaction Reference:\s*([A-Za-z0-9@.,-]+)/);
var TransactionReference = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';
tmp = content.match(/Transaction Date:\s*([A-Za-z0-9@.,-]+)/);
var TransactionDate = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';
tmp = content.match(/Transaction Charges:\s*([A-Za-z0-9@.,-]+)/);
var TransactionCharges = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';
tmp = content.match(/Beneficiary Name:\s*([A-Za-z0-9@.,'-]+\s*[[A-Za-z0-9@.,'-]+)/);
var BeneficiaryName = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';
tmp = content.match(/Exchange Rate:\s*([A-Za-z0-9@.,-]+)/);
var ExchangeRate = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';
tmp = content.match(/Beneficiary Amount:\s*([A-Za-z0-9@.,-]+)/);
var BeneficiaryAmount = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';
tmp = content.match(/Total Amount Debited from your card:\s*([A-Za-z0-9@.,-]+)/);
var SterlingAmount = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';
qs_sheet.appendRow([TransactionReference, TransactionDate, TransactionCharges, BeneficiaryName,ExchangeRate,BeneficiaryAmount,SterlingAmount]);
Utilities.sleep(500);
}
}
};
脚本工作正常,直到信息以富文本格式开始。通过查看原始来源,它似乎是:
Content-Type:text / html;字符集= utf-8的 Content-Transfer-Encoding:base64
我假设函数getPlainBody()
不能解决这个问题,不知道如何纠正这个问题?
感谢您阅读