我有一个包含重复行的电子表格。需要删除所有重复的行。
示例:
Superman superhero
Batman superhero
Apple fruit
Superman superhero
Batman superhero
运行脚本后我只想:
Apple fruit
找到这个脚本......也许是一个很好的开始点?
function removeDuplicateRows() {
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
var newData = new Array();
for(i in data){
var row = data[i];
var duplicate = false;
for(j in newData){
if(row.join() == newData[j].join()){
duplicate = true;
}
}
if(!duplicate){
newData.push(row);
}
}
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
答案 0 :(得分:0)
以下是this post在另一个论坛上提出的略有不同算法的修改。需要注意的是,它会对您的数据进行排序以及删除重复的行(但可以解决这些问题)。
function removeDuplicatesAndSort()
{
var headerRow = false;
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
if (headerRow) var header = data.splice(0, 1);
data.sort();
var counter = 0, counter2, row;
while (counter < (data.length - 1))
{
row = data[counter].join(",");
if (row == data[counter + 1].join(",")) //stuff be deleted
{
counter2 = 2;
while ((counter + counter2) < data.length && row == data[counter + counter2].join(","))
{
counter2++;
}
data.splice(counter, counter2);
}
else //move to next row
{
counter++;
}
}
if (headerRow) data.unshift(header[0]);
sheet.clearContents();
sheet.getRange(1, 1, data.length, data[0].length).setValues(data);
}
答案 1 :(得分:0)
问题是当前行一旦到达就会被添加到newData
(如果它还没有),所以总是添加一个副本。
我已经编辑了第二个循环,以检查当前行是否存在于工作表中的任何其他位置,然后再将其添加到newData
。
此解决方案不会对您的数据进行排序,但将将剩余的行移至工作表的顶部。如果您希望他们留在原地,请查看Harold's answer。
我写了一些评论来帮助你理解。
function removeDuplicates() {
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
//Get the number of the last row with data
var lastRow = sheet.getLastRow();
var lastColumn = sheet.getLastColumn();
var newData = new Array();
for(i in data){
//Our current row
var row = data[i];
var duplicate = 0;
//A loop with going through all rows with data
for(var j = 1; j <= lastRow; j++){
//A variable with the joined data of the row to compare with our current row
var currValue = sheet.getRange(j, 1, 1, lastColumn).getValues().join();
if(row.join() == currValue){
//If an occurrence of the current row exists, add 1 to the variable "duplicate"
duplicate++;
}
}
//The second loop will find the original row, so the duplicate variable will always
//be at least 1, if any duplicates exist, it will be more and the row won't be pushed
if(duplicate == 1){
newData.push(row);
}
}
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
答案 2 :(得分:0)
不确定这是你想要的,但是这里有一个小脚本可以做一个非常具体的动作:按列删除所有重复(不在其他列之间)并保留非重复值。
function eradicateDuplicate(){
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
var newData = new Array();
var cols = data[0].length;
var objTable = {};
for(var j in data[0]){
objTable[j]=new Object();
}
for(var i in data){
for(var j in data[i]){
if(typeof objTable[j][data[i][j]]=="undefined"){
objTable[j][data[i][j]]=1;
}
else{
objTable[j][data[i][j]]+=1;
}
}
}
for(var i in data){
for(var j in data[i]){
if(objTable[j][data[i][j]]>1)data[i][j]="";
}
}
sheet.getDataRange().setValues(data);
}