好吧,我试图做标题中描述的内容。两个电子表格只有一张是我比较的表格。一个电子表格是另一个的更新,所以我试图只获取新内容。 (如果它是fc(dos命令)之类的函数,这很容易......)
在做了一些搜索之后,我有了下面的脚本,它应该适用于大多数情况,每个工作表使用数组。
function test() {
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId =Folder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_sheetname = old_sheet.getName();
var old_array = old_sheet.getDataRange().getValues();
Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(file.getId());
var sheet = spreadsheet.getSheets()[0];
var sheetname = sheet.getName();
var array = sheet.getDataRange().getValues();
Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);
var newarray = getNewData(array,old_array);
Logger.log('there are ' + newarray.length + 'different rows');
}
function getNewData(array1,array2){
var diff =array2;
for (var i = 0; i<array1.length; i++){
var duplicate = false;
for (var j = 0;j<diff.length;j++){
if (array1[i].join() == diff[j].join()){
Logger.log('duplicated line found on rows ' + i + ':' + j);
diff.splice(j,1);
var duplicate= true;
break;
}
}
if (duplicate==false) {
Logger.log('not duplicated line found on row ' + i);
diff.push(array1[i]);
}
}
return diff;
}
问题是文件太大,差不多有30000行,因此脚本超出了5分钟的执行限制。
有没有办法改善这一点,例如,消除内部for循环? 或者有部分方法可以做到这一点?比如前5000行,依此类推。
此致
编辑:在稍微分析电子表格后,我发现每行都有一个ID,所以现在我只能将搜索集中在每个电子表格的一列中。所以这是我的新实现:function test(){
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId =Folder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_sheetname = old_sheet.getName();
var old_array = old_sheet.getDataRange().getValues();
Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(file.getId());
var sheet = spreadsheet.getSheets()[0];
var sheetname = sheet.getName();
var array = sheet.getDataRange().getValues();
Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);
//The COlumn has an indicator, so i search for that. I don't control the formatting of the files, so i search in both spreadsheet for the indicator
var searchString = 'NAME';
for (var i = 0; i < old_array.length; i++) {
for (var j = 0; j < old_array[i].length; j++) {
if (old_array[i][j] == searchString) {
var Row_old = i+1;
var Column_old = j;
break;
}
}
if (Row_old != undefined){
break;
}
}
for (var i = 0; i < array.length; i++) {
for (var j = 0; j < array[i].length; j++) {
if (array[i][j] == searchString) {
var Row = i+1;
var Column = j;
break;
}
}
if (Row != undefined){
break;
}
}
Logger.log(Row_old+':::'+Column_old+'\n'+Row+':::'+Column);
var diff_index =[];
var row_ind = 0;
for (var i=Row;i<array.length;i++){
Logger.log(i);
var existe = ArrayLib.indexOf(old_array, Column_old, array[i][Column]);
if (existe==-1){
Logger.log(row_ind+'!!!');
diff_index[row_ind]=i;
row_ind++;
}
}
Logger.log(diff_index);
}
这仍然没有时间......我现在将尝试合并你的评论。
答案 0 :(得分:1)
你的脚本有一些主要的瓶颈,可以大大减慢它的速度:
我们可以通过以下方式规避这些问题:
我们将使用ArrayLib进行排序(我希望它是一种快速排序算法)。
让我们从一个函数开始,找到第一列与值匹配的第一行(当前行的第一列):
function firstRowMatchingCol1(target, lookupRange) {
var min = 0;
var max = lookupRange.length - 1;
var guess;
var guessVal;
while(min <= max) {
guess = (min + max) / 2 | 0;
guessVal = lookupRange[guess][0];
if (guessVal < target) {
min = guess + 1;
} else if (guessVal > target) {
max = guess - 1;
} else {
while (guess > 0 && lookupRange[guess - 1][0] === target) {
guess -= 1;
}
return guess;
}
}
return -1;
}
现在我们可以线性地遍历每一行并检查列是否匹配,直到第一列不再匹配为止。
function matchExists(row, lookupRange) {
var index = firstRowMatchingCol1(row[0], lookupRange);
if (index === -1) {return false;}
while (index < lookupRange.length && lookupRange[index][0] === row[0]) {
for (var col = 1; col < row.length; col++) {
if (row[col] !== lookupRange[index][col]) {break;}
if (col === row.length - 1) {return true;} // This only works if the ranges are at least two columns wide but if they are one column wide you can just check if index > -1
}
index += 1;
}
return false;
}
最后我们可以得到这样的重复:
function getNonDuplicates(r1, r2) {
r2 = ArrayLib.sort(r2, 0, true);
return r1.filter(function(row) {return !matchExists(row, r2);});
}
像mTorres&#39;这是未经测试的代码
答案 1 :(得分:0)
我提出的解决方案是&#34; hack&#34;大约是时间限制。但是如果你想要一个更干净的解决方案,你可以,如果可能的话,通过以某种方式订购数组来重新组织并使代码更高效。
你没有在array1和array2中指定数据,如果行有某种ID字段你可以通过这个ID排序并检查array1上的第i行和array2上的第i行而不是比较array1中的每一行array2中的每一行(30000行效率极低)。
如果您的数据没有用于对行进行排序的ID字段,那么您可以根据我提出的解决方案做些什么:为array1上的每个比较行添加一个轨道。当运行达到时间限制时,你再次运行该函数,但是从最后一个比较行开始(你知道哪个是因为你将跟踪比较的行),当第二个运行超时你重复时,等等。
每当你进行比较时,你会问它是否是第一次运行(或者使用布尔值 - 我更愿意问用户,这样你就不会忘记改变布尔值),如果它和&#39 #39;首次运行时,删除跟踪 如果它不是第一次运行,那么您将从最后一个跟踪行的下一个开始,所以基本上会继续您的脚本结束。我一直在使用这种技术,效果很好。
在代码中(未经测试,因此在使用真实数据运行之前检查一下):
/**
* Only checks if it's the first run and calls the real work function
*/
function test() {
var firstRun = "yes" === Browser.msgBox("Question", "Is this the first run?", Browser.Buttons.YES_NO);
doTest(firstRun);
}
/**
* Gets the data of the 2 spreadsheets and also the starting
* row
*/
function doTest(firstRun) {
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId = Folder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_sheetname = old_sheet.getName();
var old_array = old_sheet.getDataRange().getValues();
/**
* Here is the code to create the tracking hability
*/
var strartFromRow = 0; // 0 because row 1 is array 0 index when you getValues();
var trackSheet = old_spreadsheet.getSheetByName("Tracking");
if (trackSheet === null) {
trackSheet = old_spreadsheet.insertSheet("Tracking");
}
if (firstRun) {
trackSheet.getRange("A:A").clearContent(); // make sure there no row is tracked yet
}
else {
// we have to continue from the previous row, keep in mind you're making the comparison
// with array which is 0 based, but sheet is 1 based, but you want the next one so getLasRow()
// should be the first item to compare on your array
strartFromRow = trackSheet.getLastRow();
}
Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(file.getId());
var sheet = spreadsheet.getSheets()[0];
var sheetname = sheet.getName();
var array = sheet.getDataRange().getValues();
Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);
// when you call the DIFF function, pass the tracking sheet and the start Row
var newarray = getNewData(array,old_array, trackSheet, startFromRow);
Logger.log('there are ' + newarray.length + 'different rows');
}
/**
* Creates a diff array using array1 and array2
* It marks each element on array1 once it has checked if it's in array2
*/
function getNewData(array1, array2, trackingSheet, startFromRow){
var logRow = trackingSheet.getLastRow();
var diff = array2;
for (var i = startFromRow; i < array1.length; i++){
var duplicate = false;
for (var j = 0; j < diff.length;j++){
if (array1[i].join() == diff[j].join()){
Logger.log('duplicated line found on rows ' + i + ':' + j);
diff.splice(j,1);
duplicate = true;
break;
}
}
if (duplicate === false) {
Logger.log('not duplicated line found on row ' + i);
diff.push(array1[i]);
}
trackingSheet.getRange(logRow++, 1).setValue("Checked!"); // Mark i row as checked
}
return diff;
}
答案 2 :(得分:0)
最后,我决定选择缓存服务选项,这是代码,我正在测试它,看看我是否继续使用它。
function getNewData() {
//deleting triggers
var triggers = ScriptApp.getProjectTriggers();
for (var i = 0; i < triggers.length; i++) {
if (triggers[i].getHandlerFunction()=='getNewData'){
ScriptApp.deleteTrigger(triggers[i]);
}
}
//max running time = 5.5 min
var MAX_RUNNING_TIME = 330000;
var startTime= (new Date()).getTime();
//get cache
var cache = CacheService.getUserCache();
var downloaded =JSON.parse(cache.get('downloaded'));
var compared =JSON.parse(cache.get('compared'));
//start
if (downloaded==1 && compared!=1){
//folder
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId = licitacionesFolder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_array = old_sheet.getDataRange().getValues();
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(old_file.getId());
var sheet = spreadsheet.getSheets()[0];
var array = sheet.getDataRange().getValues();
Logger.log(array.length+'::'+old_array.length);
// Column
var searchString = 'NAME';
var RC = getColumn(array,searchString);
var Row = RC.Row;
var Column = RC.Column;
var RC = getColumn(old_array,searchString);
var Row_old = RC.Row;
var Column_old = RC.Column;
Logger.log(Row_old+':::'+Column_old+'\n'+Row+':::'+Column);
//compare
var diff_index =JSON.parse(cache.get('diff_index'));
var row_ind =JSON.parse(cache.get('row_ind'));
var Roww =JSON.parse(cache.get('Row'));
if (diff_index==null){var diff_index = [];}
if (row_ind==null){var row_ind = 0;}
if (Roww==null){var Roww = Row;}
Logger.log(row_ind+'\n'+Roww);
for (var i=Roww;i<array.length;i++){
var currTime = (new Date()).getTime();
if(currTime - startTime >= MAX_RUNNING_TIME){
Logger.log((currTime - startTime)/(1000*60));
Logger.log(i+'::'+row_ind);
cache.putAll({'diff_index': JSON.stringify(diff_index),'row_ind': JSON.stringify(row_ind),'Row': JSON.stringify(i-1)},21600);
ScriptApp.newTrigger('getNewData').timeBased().after(2 * 60 * 1000).create();
return;
} else {
Logger.log(i);
var existe = ArrayLib.indexOf(old_array, Column_old, array[i][Column]);
if (existe==-1){
Logger.log(row_ind+'!!!');
diff_index[row_ind]=i;
row_ind++;
}
}
}
cache.putAll({'diff_index': JSON.stringify(diff_index),'Row': JSON.stringify(Row),'compared': JSON.stringify(1)},21600);
} else {
Logger.log('file not downloaded yet or already compared');
}
}
function getColumn(array,searchString){
for (var i = 0; i < array.length; i++) {
for (var j = 0; j < array[i].length; j++) {
if (array[i][j] == searchString) {
var Row = i+1;
var Column = j;
break;
}
}
if (Row != undefined){
break;
}
}
return {Row: Row, Column: Column};
}
答案 3 :(得分:0)
这是一个可以绕过时间限制的替代解决方案。创建新的专用电子表格以及自定义侧边栏。侧边栏将要求您创建一些HTML,最终将在客户端的iframe中嵌入和呈现。您可以通过脚本标记将纯JavaScript嵌入到HTML中。
这种方法的优点在于这些脚本不会在服务器端运行,而是在客户端上独立于Google Apps脚本的服务器端环境运行,并且不受6分钟限制。此外,他们还可以在您的Google脚本中调用功能。因此,一种方法是让客户端脚本调用Google Script函数来检索必需的数据,在客户端脚本中执行所有繁重的处理,然后将结果发送回服务器端脚本以更新片材。
以下是设置自定义侧边栏的链接,以帮助您入门: https://developers.google.com/apps-script/guides/dialogs#custom_sidebars