错误从Gmail获取pdf附件作为文本

时间:2017-07-31 15:30:10

标签: pdf google-apps-script gmail email-attachments drive

我需要帮助解决此代码问题。我得到的错误没有记录。

启用Api并遵循说明 原帖: Get pdf-attachments from Gmail as text

我收到错误:

  1. var blob = attachments[0].getAs(MimeType.PDF);
  2. 不支持从application / octet-stream转换为application / pdf 。 (第16行,文件“bla bla”)

    原始代码不会改变:

     /**
      * Get messages labeled 'templabel', and send myself the text contents of
      * pdf attachments in new emails.
      */
     function myFunction() {
    
       var threads = GmailApp.search('label:invoices-parsed');
       var threadsMessages = GmailApp.getMessagesForThreads(threads);
    
       for (var thread = 0; thread < threadsMessages.length; ++thread) {
         var message = threadsMessages[thread][0];
         var messageBody = message.getBody();
         var messageSubject = message.getSubject();
         var attachments = message.getAttachments();
    
         var blob = attachments[0].getAs(MimeType.PDF);
         var filetext = pdfToText( blob, {keepTextfile: false} );
    
         GmailApp.sendEmail(Session.getActiveUser().getEmail(), messageSubject, filetext);
       }
     }
    
     /**
      * See gist: https://gist.github.com/mogsdad/e6795e438615d252584f
      *
      * Convert pdf file (blob) to a text file on Drive, using built-in OCR.
      * By default, the text file will be placed in the root folder, with the same
      * name as source pdf (but extension 'txt'). Options:
      *   keepPdf (boolean, default false)     Keep a copy of the original PDF file.
      *   keepGdoc (boolean, default false)    Keep a copy of the OCR Google Doc file.
      *   keepTextfile (boolean, default true) Keep a copy of the text file.
      *   path (string, default blank)         Folder path to store file(s) in.
      *   ocrLanguage (ISO 639-1 code)         Default 'en'.
      *   textResult (boolean, default false)  If true and keepTextfile true, return
      *                                        string of text content. If keepTextfile
      *                                        is false, text content is returned without
      *                                        regard to this option. Otherwise, return
      *                                        id of textfile.
      *
      * @param {blob}   pdfFile    Blob containing pdf file
      * @param {object} options    (Optional) Object specifying handling details
      *
      * @returns {string}          id of text file (default) or text content
      */
     function pdfToText ( pdfFile, options ) {
       // Ensure Advanced Drive Service is enabled
       try {
         Drive.Files.list();
       }
       catch (e) {
         throw new Error( "To use pdfToText(), first enable 'Drive API' in Resources > Advanced Google Services." );
       }
    
       // Set default options
       options = options || {};
       options.keepTextfile = options.hasOwnProperty("keepTextfile") ? options.keepTextfile : true;
    
       // Prepare resource object for file creation
       var parents = [];
       if (options.path) {
         parents.push( getDriveFolderFromPath (options.path) );
       }
       var pdfName = pdfFile.getName();
       var resource = {
         title: pdfName,
         mimeType: pdfFile.getContentType(),
         parents: parents
       };
    
       // Save PDF to Drive, if requested
       if (options.keepPdf) {
         var file = Drive.Files.insert(resource, pdfFile);
       }
    
       // Save PDF as GDOC
       resource.title = pdfName.replace(/pdf$/, 'gdoc');
       var insertOpts = {
         ocr: true,
         ocrLanguage: options.ocrLanguage || 'en'
       }
       var gdocFile = Drive.Files.insert(resource, pdfFile, insertOpts);
    
       // Get text from GDOC  
       var gdocDoc = DocumentApp.openById(gdocFile.id);
       var text = gdocDoc.getBody().getText();
    
       // We're done using the Gdoc. Unless requested to keepGdoc, delete it.
       if (!options.keepGdoc) {
         Drive.Files.remove(gdocFile.id);
       }
    
       // Save text file, if requested
       if (options.keepTextfile) {
         resource.title = pdfName.replace(/pdf$/, 'txt');
         resource.mimeType = MimeType.PLAIN_TEXT;
    
         var textBlob = Utilities.newBlob(text, MimeType.PLAIN_TEXT, resource.title);
         var textFile = Drive.Files.insert(resource, textBlob);
       }
    
       // Return result of conversion
       if (!options.keepTextfile || options.textResult) {
         return text;
       }
       else {
         return textFile.id
       }
     }
    
     // From: http://ramblings.mcpher.com/Home/excelquirks/gooscript/driveapppathfolder
     function getDriveFolderFromPath (path) {
       return (path || "/").split("/").reduce ( function(prev,current) {
         if (prev && current) {
           var fldrs = prev.getFoldersByName(current);
           return fldrs.hasNext() ? fldrs.next() : null;
         }
         else { 
           return current ? null : prev; 
         }
       },DriveApp.getRootFolder()); 
     }
    

1 个答案:

答案 0 :(得分:2)

尝试将附件的contentType设置为“application / pdf”。

var attachments = message.getAttachments();
attachments[0].setContentType("appl‌​ication/pdf");