Google Apps脚本,是从外部电子表格中检索数据的最快方法

时间:2014-12-05 21:45:24

标签: javascript google-apps-script google-sheets

我正在尝试将多个电子表格(~100)中的数据加载到单个电子表格中,但是当我尝试这样做时,我的脚本会超时。打开每个电子表格似乎需要很长时间。有什么方法可以加快速度或解决问题吗?

以下是我用来打开每个电子表格的内容

// We set the current spreadsheet to master and get the current date.
var master = SpreadsheetApp.getActive();
var masterSheet = master.getSheetByName('Master');
var users = master.getEditors();
var today = new Date();

// Adds the menu to the spreadsheet
function onOpen() {
  var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
  var entries = [{
    name : "Update Data",
    functionName : "retrievePartnerData"
  }];
  spreadsheet.addMenu("Submissions Menu", entries);
};


// First we get all data from the partner sheets
function retrievePartnerData() {

  masterSheet.getRange(2, 1, masterSheet.getLastRow(), masterSheet.getLastColumn()).clear(); //Clear our master sheet aka Sheet All
  masterSheet.hideSheet();

  //Get's Promo Outline from the internal sheet and store it's values in the promoRange array
  var promoRange = master.getSheetByName("Promotional Outline").getRange("A1:Z100").getValues();

  var sheetPartnerArray = [];

  // Row is an array that contaings the url's to the external spreadsheets
  var row = master.getSheetByName('Partner Sheet Collection').getRange("B:B").getValues();

  row.map(function(e){
    if(e[0] != "" && e[0] != "Url"){
      var ss = SpreadsheetApp.openByUrl(e[0]);

      var studioName = ss.getSheets()[0].getRange("A1").getValue();

      //Updates the Promotional Outline sheet in the partner sheet
      var promoSheet = ss.getSheetByName("Promotional Outline");   
      promoSheet.getRange("A1:Z100").setValues(promoRange);

      //Hide columns K to Z
      promoSheet.hideColumns(11,4);

      var sheet = ss.getSheets();

      sheet.map(function(f){
        var sheetName = f.getSheetName(); // Retrieves the sheetname of each sheet

        var lastRow = 0;
        if(f.getLastRow() == 1) {
          lastRow = 1;
        } else {
          lastRow = f.getLastRow() - 1;
        }
        var dataRange = f.getRange(2, 1, lastRow, f.getLastColumn());
        var data = dataRange.getValues();

        for (var j = 0; j < data.length; j++) {
          if (data[j][0].length != 0 && (data[j][5] > today || data[j][5] == "[Please Enter]")) { // We check if the promo end date is after the current day
            var sheetRow = data[j];
            sheetRow[1] = studioName;
            sheetRow.unshift(sheetName); //Adds the Country to the beginning of the row using the sheet name from spreadsheets 
            sheetPartnerArray.push(sheetRow);
          }
        }
      })
    }
  })
  masterSheet.getRange(2, 1, sheetPartnerArray.length , sheetPartnerArray[0].length ).setValues(sheetPartnerArray);
};

谢谢!

1 个答案:

答案 0 :(得分:3)

一种常见的方法是设置触发器以在将来的某个时间重启您的大作业(超出最大执行时间)。然后你的大工作尽可能多地做(或者在某个逻辑点上很好地停止),并且要么被杀死要么悄然退出。无论哪种方式,它会在不久之后重新启动,并恢复其工作。

Patt0已经将这个想法带到了一个优雅的结尾providing a library,您可以将其添加到您的脚本中。通过一些调整,您应该能够将retrievePartnerData()转换为批处理作业。

由于您已经有一个菜单,并且retrievePartnerData()涉及迭代许多电子表格,您可以通过在单独的服务器中完成每次迭代(或更好,一组迭代)来以另一种方式打破障碍脚本实例。

此技术显示在What happens when I "sleep" in GAS ? (execution time limit workaround)

How to poll a Google Doc from an add-on中有类似的内容。在该答案中,UI客户端使用计时器重复执行服务器功能。但是,在这里,迭代将基于工作,而不是基于时间。在浏览器中运行的这个客户端功能将继续调用服务器,直到没有其他工作要做:

  /**
   * Call the server-side 'serverProcess' function until there's no more work.
   */
  function dispatchWork(){
    if (window.runningProcess) {
    }
    google.script.run
     .withSuccessHandler(                //<<<< if last call was good
       // After each interval, decide what to do next
       function(workis) {
         if (!workis.done) {             //<<<<< check if we're done
           // There's more work to do, keep going.
           dispatchWork();
         }
         else {
           // All done. Stop timer
           stopTimer();
           $('#start-process').hide();
           $("#final").html(' <h2>Processing complete!</h2>');
         }
       })
     .withFailureHandler(
       function(msg, element) {         //<<<<<< do this if error
         showError(msg, $('#button-bar'));
         element.disabled = false;
       })
     .serverProcess();                  //<<<<< call server function
  };

在您的情况下,您首先需要重构retrievePartnerData(),以便可以从客户端调用它来处理单个电子表格(或其中的一组)。毫无疑问,你已经花了相当多的时间让map循环干净利落,将它拆开会很痛苦,但这是值得的。

以下电子表格绑定脚本可以根据您的需要进行调整。它由一个菜单项,一个简单的用户界面和客户端上的脚本(Javascript + jQuery)和服务器(Google Apps脚本)组成,它们可以间隔地控制工作。

screenshot

控制数据位于&#34; SourceSheets&#34;选项卡,结果将被复制到&#34; Master&#34;。

screenshot

Code.gs

var properties = PropertiesService.getScriptProperties();

// Adds the menu to the spreadsheet
function onOpen() {
  var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
  var entries = [{
    name : "Update Data",
    functionName : "updateData"
  }];
  spreadsheet.addMenu("Big Job", entries);
};

/**
 * Presents UI to user.
 */
function updateData () {
  var userInterface = HtmlService.createHtmlOutputFromFile("Conductor")
                                 .setHeight(150)
                                 .setWidth(250)
                                 .setTitle("What 5 minute limit?");
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  ss.show(userInterface)
}

/**
 * Called from client, this function performs the server work in
 * intervals. It will exit when processing time has exceeded MAX_INTERVAL,
 * 3.5 minutes. Every time this function exits, the client is provided
 * with the current status object, done=true when the work queue has
 * been emptied.
 *
 * @returns {Object}      Status { done: boolean }
 */
function serverProcess() {
  var MAX_INTERVAL = (3.5 * 60);  // minutes * seconds
  var intervalStart = Math.round(new Date() / 1000);

  // Get persisted work queue, if there is one
  var queueProp = properties.getProperty('work-queue') || '[]';
  var queue = JSON.parse(queueProp);

  if (queue.length == 0) {
    queue = prepareWork();
  }

  // Do the work for this interval, until we're out of time
  while ((Math.round(new Date() / 1000) - intervalStart) < MAX_INTERVAL) {
    if (queue.length > 0) {
      var ssID = queue.shift();
      processSheet(ssID);
      properties.setProperty('work-queue', JSON.stringify(queue));
    }
    else break;
  }

  // Report result of this interval to client
  var result = { done : (queue.length == 0) };
  return( result );
}

/**
 * Set up work queue & clear Master sheet, ready to import data from source sheets.
 *
 * @return {String[]}             work queue
 */
function prepareWork() {
  // No work yet, so set up work
  var ss = SpreadsheetApp.getActive();
  var masterSheet = ss.getSheetByName('Master');
  var rowsToDelete = masterSheet.getMaxRows()-1;
  if (rowsToDelete)
    masterSheet.deleteRows(2, rowsToDelete);  //Clear our master sheet aka Sheet All

  // Build work queue
  var queue = [];
  var data = ss.getSheetByName('SourceSheets')      // get all data
               .getDataRange().getValues();
  var headers = data.splice(0,1)[0];                // take headers off it
  var ssIDcol = headers.indexOf('Spreadsheet ID');  // find column with work
  for (var i=0; i<data.length; i++) {
    queue.push(data[i][ssIDcol]);                   // queue up the work
  }
  // Persist the work queue as a scriptProperty
  properties.setProperty('work-queue', JSON.stringify(queue));
  return queue;
}

/**
 * Do whatever work item we need. In this example, we'll import all data from
 * the source sheet and append it to our Master.
 *
 * @param {String}  ssID       Source spreadsheet ID
 */
function processSheet(ssID) {
  var masterSheet = SpreadsheetApp.getActive().getSheetByName('Master');
  var sourceSheet = SpreadsheetApp.openById(ssID).getSheetByName('Sheet1');

  Utilities.sleep(60000);   // You probably don't want to do this... just wasting time.

  var masterLastRow = masterSheet.getLastRow();
  var sourceRows = sourceSheet.getLastRow();
  masterSheet.insertRowsAfter(masterSheet.getLastRow(), sourceSheet.getLastRow());

  var sourceData = sourceSheet.getDataRange().getValues().slice(1);
  var destRange = masterSheet.getRange(masterLastRow+1, 1, sourceData.length, sourceData[0].length);
  destRange.setValues(sourceData);
}

Conductor.html

<link rel="stylesheet" href="https://ssl.gstatic.com/docs/script/css/add-ons.css">
<!-- The CSS package above applies Google styling to buttons and other elements. -->


<div id="form-div" class="sidebar branding-below">
  <span id="final"></span>
  <form>
    <div class="block" id="button-bar">
      <button class="blue" id="start-process">Start processing</button>
    </div>
  </form>
</div>

<div class="bottom">
  Elapsed processing time: <span id="elapsed">--:--:--</span>
</div>

<script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js">
</script>

<script>
  /**
   * On document load, assign click handlers to button(s), add
   * elements that should start hidden (avoids "flashing"), and
   * start polling for document selections.
   */
  $(function() {
    // assign click handler(s)
    $('#start-process').click(startProcess);

  });

  /**
   * Call the server-side 'serverProcess' function until there's no more work.
   */
  function dispatchWork(){
    if (window.runningProcess) {
    }
    google.script.run
     .withSuccessHandler(
       // After each interval, decide what to do next
       function(workis) {
         if (!workis.done) {
           // There's more work to do, keep going.
           dispatchWork();
         }
         else {
           // All done. Stop timer
           stopTimer();
           $('#start-process').hide();
           $("#final").html(' <h2>Processing complete!</h2>');
         }
       })
     .withFailureHandler(
       function(msg, element) {
         showError(msg, $('#button-bar'));
         element.disabled = false;
       })
     .serverProcess();
  };

  /**
   * Runs a server-side function to retrieve the currently
   * selected text.
   */
  function startProcess() {
    this.disabled = true;  // Disable the button
    $('#error').remove();  // Clear previous error messages, if any
    startTimer();          // Start a work timer, for display to user
    window.runningProcess = true;
    dispatchWork();        // Start our work on the server
  }


  // Timer adapted from http://codingforums.com/javascript-programming/159873-displaying-elapsed-time.html

  /**
    * Kicks off the tick function.
    */
  function startTimer( )
  {
    window.seconds = null;
    window.ticker = null;
    window.seconds = -1;
    window.ticker = setInterval(tick, 1000);
    tick( );
  }

  /**
   * Stop ticking
   */
  function stopTimer()
  {
    clearInterval(window.ticker);
  }

  /*
   * Updates the timer display, between sleeps.
   */
  function tick( )
  {
    ++window.seconds;
    var secs = window.seconds;
    var hrs = Math.floor( secs / 3600 );
    secs %= 3600;
    var mns = Math.floor( secs / 60 );
    secs %= 60;
    var pretty = ( hrs < 10 ? "0" : "" ) + hrs
               + ":" + ( mns < 10 ? "0" : "" ) + mns
               + ":" + ( secs < 10 ? "0" : "" ) + secs;
    $("#elapsed").text(pretty);
  }

  /**
   * Inserts a div that contains an error message after a given element.
   *
   * @param msg The error message to display.
   * @param element The element after which to display the error.
   */
  function showError(msg, element) {
    var div = $('<div id="error" class="error">' + msg + '</div>');
    $(element).after(div);
  }
</script>