假设我有一组汉堡...
对于每个汉堡,我都有一组与汉堡各个组成部分相关的图像。
不幸的是,这些组件的结构没有任何一致性(我没有写过)。
这是两个文档的示例:
{
"bunsResource": {
"image": {
"url": "./buns_1.png",
"who": "Sam"
},
"buns": [
{
"image": {
"url": "./top-bun_1.png",
"who": "Jim"
}
},
{
"image": {
"url": "./bottom-bun_1.png",
"who": "Sarah"
}
}
]
},
"pattyResource": {
"image": {
"url": "./patties_1.png",
"who": "Kathy"
},
"patties": [
{
"image": {
"url": "./patty_1.jpg",
"who": "Kathy"
}
}
]
}
},
{
"bunsResource": {
"image": {
"url": "./buns_2.png",
"who": "Jim"
},
"buns": [
{
"image": {
"url": "./top-bun_2.png",
"who": "Jim"
}
},
{
"image": {
"url": "./bottom-bun_2.png",
"who": "Kathy"
}
}
]
},
"pattyResource": {
"image": {
"url": "./patties_1.png",
"who": "Kathy"
},
"patties": [
{
"image": {
"url": "./patty_1.jpg",
"who": "Kathy"
}
}
]
}
}
我需要的是一组photographer / image count
。
{
"who": "Sam",
"count": 1
},
{
"who": "Jim",
"count": 3
},
{
"who": "Sarah",
"count": 2
},
{
"who": "Kathy",
"count": 2
}
请注意,这是唯一个图片计数!
我无法弄清楚该如何实现...
我假设我需要首先将每个burger
解析为一组唯一的url / who
,然后从那里进行汇总,但是我不知道如何获取平坦的{{1 }}每个汉堡包。
答案 0 :(得分:5)
这取决于patties
和buns
数组是否为nested
。如果不是,那么很容易,您可以使用脚本运行terms
聚合,该脚本从文档的各处收集所有who
字段:
POST not-nested/_search
{
"size": 0,
"aggs": {
"script": {
"terms": {
"script": {
"source": """
def list = new ArrayList();
list.addAll(doc['pattyResource.image.who.keyword'].values);
list.addAll(doc['bunsResource.image.who.keyword'].values);
list.addAll(doc['bunsResource.buns.image.who.keyword'].values);
list.addAll(doc['pattyResource.patties.image.who.keyword'].values);
return list;
"""
}
}
}
}
}
这将返回以下内容:
"aggregations" : {
"script" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Jim",
"doc_count" : 2
},
{
"key" : "Kathy",
"doc_count" : 2
},
{
"key" : "Sam",
"doc_count" : 1
},
{
"key" : "Sarah",
"doc_count" : 1
}
]
}
}
但是,如果嵌套的话,事情会变得更加复杂,因为您需要进行一些客户端工作来计算最终数量,但是我们可以通过一些聚合来简化客户端工作:
POST nested/_search
{
"size": 0,
"aggs": {
"bunsWho": {
"terms": {
"field": "bunsResource.image.who.keyword"
}
},
"bunsWhoNested": {
"nested": {
"path": "bunsResource.buns"
},
"aggs": {
"who": {
"terms": {
"field": "bunsResource.buns.image.who.keyword"
}
}
}
},
"pattiesWho": {
"terms": {
"field": "pattyResource.image.who.keyword"
}
},
"pattiesWhoNested": {
"nested": {
"path": "pattyResource.patties"
},
"aggs": {
"who": {
"terms": {
"field": "pattyResource.patties.image.who.keyword"
}
}
}
}
}
}
这将返回以下内容:
"aggregations" : {
"pattiesWho" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Kathy",
"doc_count" : 2
}
]
},
"bunsWhoNested" : {
"doc_count" : 4,
"who" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Jim",
"doc_count" : 2
},
{
"key" : "Kathy",
"doc_count" : 1
},
{
"key" : "Sarah",
"doc_count" : 1
}
]
}
},
"pattiesWhoNested" : {
"doc_count" : 2,
"who" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Kathy",
"doc_count" : 2
}
]
}
},
"bunsWho" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Jim",
"doc_count" : 1
},
{
"key" : "Sam",
"doc_count" : 1
}
]
}
}
然后您可以简单地创建一些客户端逻辑(此处为Node.js中的一些示例代码)来将数字相加:
var whos = {};
var recordWho = function(who, count) {
whos[who] = (whos[who] || 0) + count;
};
resp.aggregations.pattiesWho.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.pattiesWhoNested.who.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.bunsWho.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.bunsWhoNested.who.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
console.log(whos);
=>
{ Kathy: 5, Jim: 3, Sam: 1, Sarah: 1 }