我有一列包含一堆成分清单的列。我试图弄清楚不同成分出现了多少次。有73,000行。此question的答案适用于Google表格中的少量数据。
公式为=UNIQUE(TRANSPOSE(SPLIT(JOIN(", ";A2:A);", ";FALSE)))
但是我在这里JOIN
超过了50000个字符。还有另一种解决方法吗?
表格:https://docs.google.com/spreadsheets/d/1t0P9hMmVpwhI2IbATmIMjobuALTg8VWhl8-AQaq3zIo/edit?usp=sharing
答案 0 :(得分:1)
=UNIQUE(TRANSPOSE(SPLIT(REGEXREPLACE(TRANSPOSE(
QUERY(ARRAYFORMULA(","&A1:A),,5000000))," ,",","),",")))
答案 1 :(得分:1)
但也许您需要这个(?):
=QUERY(TRANSPOSE(SPLIT(REGEXREPLACE(TRANSPOSE(
QUERY(ARRAYFORMULA(","&A1:A),,5000000))," ,",","),",")),
"select Col1,count(Col1)
where Col1 is not null
group by Col1
label count(Col1)''")
答案 2 :(得分:1)
=ARRAYFORMULA(UNIQUE(TRIM(TRANSPOSE(SPLIT(TRANSPOSE(
QUERY(","&A1:A,,5000000)),",")))))
=QUERY(QUERY(ARRAYFORMULA(TRIM(TRANSPOSE(SPLIT(TRANSPOSE(
QUERY(","&A1:A,,5000000)),",")))),
"select Col1,count(Col1)
where Col1 is not null
group by Col1
label count(Col1)''"),
"order by Col2 desc")
答案 3 :(得分:1)
我做了一个google脚本解决方案,因为我想玩关键地图对。
function myFunction() {
var myMap = {"candy":0};
var sh = SpreadsheetApp.getActiveSpreadsheet();
var ss = sh.getSheetByName("FIRSTSHEETNAME");
var os = sh.getSheetByName("Ingredients");
var data = ss.getDataRange().getValues();
for (var i=0; i<data.length;i++)//full
//for (var i=1; i<4000;i++)//test
{
var array = data[i][0].split( ",");
for (var j=0; j<array.length;j++)
{
var item = array[j];
//Logger.log(array[j]);
if (myMap[item]>-1){
//Logger.log("REPEAT INGREDIENT");
var num = parseInt(myMap[item]);
num++;
myMap[item]=num;
//Logger.log(item +" "+num);
} else {
myMap[item]=1;
//Logger.log("New Ingredient: "+item);
//Logger.log(myMap);
}
}
}
//Logger.log(myMap);
var output=[];
for (var key in myMap){
//Logger.log("Ack");
output.push([key,myMap[key]]);
}
//Logger.log(output);
os.getRange(2,1,output.length,output[0].length).setValues(output);
}
您需要为输出添加一个“成分”选项卡,并将第一个选项卡更改为FIRSTSHEETNAME(或更改代码)。在我的测试中,4个项目花费了4秒,400个项目花费了5秒,而4000个项目花费了6秒。前导空格可能有问题,但这为您提供了一个起点。