Gmail以google工作表解析具有丰富文本格式的数据

时间:2017-07-15 18:52:47

标签: javascript regex google-sheets gmail

我一直在使用Google应用程序'使用正则表达式解析gmail数据的脚本。邮件是在一个发件人的特定Gmail邮件标签中读取的,如下面的代码所示:

   //  Adapted from https://gist.github.com/Ferrari/9678772
  //https://stackoverflow.com/questions/31345400/extract-info-from-email-body-with-google-scripts

function processInboxToSheet() {

// Have to get data separate to avoid google app script limit!

var start = 0;
var label = GmailApp.getUserLabelByName("Any label name here");
var threads = label.getThreads();

var SPREADSHEET_URL = "Any google sheet URL in here ";
var SHEET_NAME = 'Required Sheet tab name here';
var spreadsheet = SpreadsheetApp.openByUrl(SPREADSHEET_URL);
var qs_sheet = spreadsheet.getSheetByName(SHEET_NAME);
var result = [];



for (var i = 0; i < threads.length; i++) {
var messages = threads[i].getMessages();

var content = messages[0].getPlainBody();

// implement your own parsing rule inside
if (content) {
  tmp = content.match(/Transaction Reference:\s*([A-Za-z0-9@.,-]+)/);
  var TransactionReference = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';

  tmp = content.match(/Transaction Date:\s*([A-Za-z0-9@.,-]+)/);
  var TransactionDate = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';

  tmp = content.match(/Transaction Charges:\s*([A-Za-z0-9@.,-]+)/);
  var TransactionCharges = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';

  tmp = content.match(/Beneficiary Name:\s*([A-Za-z0-9@.,'-]+\s*[[A-Za-z0-9@.,'-]+)/);
  var BeneficiaryName = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';

  tmp = content.match(/Exchange Rate:\s*([A-Za-z0-9@.,-]+)/);
  var ExchangeRate = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';

  tmp = content.match(/Beneficiary Amount:\s*([A-Za-z0-9@.,-]+)/);
  var BeneficiaryAmount = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';

  tmp = content.match(/Total Amount Debited from your card:\s*([A-Za-z0-9@.,-]+)/);
  var SterlingAmount = (tmp && tmp[1]) ? tmp[1].trim() : 'No data';


  qs_sheet.appendRow([TransactionReference, TransactionDate, TransactionCharges, BeneficiaryName,ExchangeRate,BeneficiaryAmount,SterlingAmount]);

  Utilities.sleep(500);
    }
  }
};

脚本工作正常,直到信息以富文本格式开始。通过查看原始来源,它似乎是:

Content-Type:text / html;字符集= utf-8的 Content-Transfer-Encoding:base64

我假设函数getPlainBody()不能解决这个问题,不知道如何纠正这个问题?

感谢您阅读

0 个答案:

没有答案