Google电子表格查找重复项并删除所有

时间:2014-02-17 19:38:57

标签: duplicates google-sheets delete-row

我有一个包含重复行的电子表格。需要删除所有重复的行。

示例:

Superman    superhero
Batman      superhero
Apple       fruit
Superman    superhero
Batman      superhero

运行脚本后我只想:

Apple       fruit

找到这个脚本......也许是一个很好的开始点?

function removeDuplicateRows() {
  var sheet = SpreadsheetApp.getActiveSheet();
  var data = sheet.getDataRange().getValues();
  var newData = new Array();

  for(i in data){
    var row = data[i];
    var duplicate = false;

    for(j in newData){
      if(row.join() == newData[j].join()){
        duplicate = true;
      }
    }
    if(!duplicate){
      newData.push(row);
    }
  }
  sheet.clearContents();
  sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}

3 个答案:

答案 0 :(得分:0)

以下是this post在另一个论坛上提出的略有不同算法的修改。需要注意的是,它会对您的数据进行排序以及删除重复的行(但可以解决这些问题)。

function removeDuplicatesAndSort()
{
  var headerRow = false;

  var sheet = SpreadsheetApp.getActiveSheet();
  var data = sheet.getDataRange().getValues();

  if (headerRow) var header = data.splice(0, 1);
  data.sort();
  var counter = 0, counter2, row;
  while (counter < (data.length - 1))
  {
    row = data[counter].join(",");
    if (row == data[counter + 1].join(",")) //stuff be deleted
    {
      counter2 = 2;
      while ((counter + counter2) < data.length && row == data[counter + counter2].join(","))
      {
        counter2++;
      }
      data.splice(counter, counter2);
    }
    else //move to next row
    {
      counter++;
    }
  }
  if (headerRow) data.unshift(header[0]);
  sheet.clearContents();
  sheet.getRange(1, 1, data.length, data[0].length).setValues(data);
}

答案 1 :(得分:0)

问题是当前行一旦到达就会被添加到newData(如果它还没有),所以总是添加一个副本。 我已经编辑了第二个循环,以检查当前行是否存在于工作表中的任何其他位置,然后再将其添加到newData

此解决方案不会对您的数据进行排序,但将剩余的行移至工作表的顶部。如果您希望他们留在原地,请查看Harold's answer

我写了一些评论来帮助你理解。

function removeDuplicates() {
  var sheet = SpreadsheetApp.getActiveSheet();
  var data = sheet.getDataRange().getValues();

  //Get the number of the last row with data
  var lastRow = sheet.getLastRow();
  var lastColumn = sheet.getLastColumn();
  var newData = new Array();
  for(i in data){

    //Our current row
    var row = data[i];

    var duplicate = 0;

    //A loop with going through all rows with data
    for(var j = 1; j <= lastRow; j++){

      //A variable with the joined data of the row to compare with our current row
      var currValue = sheet.getRange(j, 1, 1, lastColumn).getValues().join();
      if(row.join() == currValue){

        //If an occurrence of the current row exists, add 1 to the variable "duplicate"
        duplicate++;
      }
    }

    //The second loop will find the original row, so the duplicate variable will always
    //be at least 1, if any duplicates exist, it will be more and the row won't be pushed
    if(duplicate == 1){
      newData.push(row);
    }
  }
  sheet.clearContents();
  sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}

答案 2 :(得分:0)

不确定这是你想要的,但是这里有一个小脚本可以做一个非常具体的动作:按列删除所有重复(不在其他列之间)并保留非重复值。

function eradicateDuplicate(){
  var sheet = SpreadsheetApp.getActiveSheet();
  var data = sheet.getDataRange().getValues();
  var newData = new Array();
  var cols = data[0].length;
  var objTable = {};
  for(var j in data[0]){
    objTable[j]=new Object();
  }
  for(var i in data){
    for(var j in data[i]){
      if(typeof objTable[j][data[i][j]]=="undefined"){
         objTable[j][data[i][j]]=1;
      }
      else{
        objTable[j][data[i][j]]+=1;
      }
    }
  }
  for(var i in data){
    for(var j in data[i]){
      if(objTable[j][data[i][j]]>1)data[i][j]="";
    }
  }
  sheet.getDataRange().setValues(data);
}