ElasticSearch:汇总一组收集的结果

时间:2019-03-04 21:32:38

标签: elasticsearch elasticsearch-aggregation

假设我有一组汉堡...

对于每个汉堡,我都有一组与汉堡各个组成部分相关的图像。

不幸的是,这些组件的结构没有任何一致性(我没有写过)。

这是两个文档的示例:

{
    "bunsResource": {
        "image": {
            "url": "./buns_1.png",
            "who": "Sam"
        },
        "buns": [
            {
                "image": {
                    "url": "./top-bun_1.png",
                    "who": "Jim"
                }
            },
            {
                "image": {
                    "url": "./bottom-bun_1.png",
                    "who": "Sarah"
                }
            }
        ]
    },
    "pattyResource": {
        "image": {
            "url": "./patties_1.png",
            "who": "Kathy"
        },
        "patties": [
            {
                "image": {
                    "url": "./patty_1.jpg",
                    "who": "Kathy"
                }
            }
        ]
    }
},
{
    "bunsResource": {
        "image": {
            "url": "./buns_2.png",
            "who": "Jim"
        },
        "buns": [
            {
                "image": {
                    "url": "./top-bun_2.png",
                    "who": "Jim"
                }
            },
            {
                "image": {
                    "url": "./bottom-bun_2.png",
                    "who": "Kathy"
                }
            }
        ]
    },
    "pattyResource": {
        "image": {
            "url": "./patties_1.png",
            "who": "Kathy"
        },
        "patties": [
            {
                "image": {
                    "url": "./patty_1.jpg",
                    "who": "Kathy"
                }
            }
        ]
    }
}

我需要的是一组photographer / image count

{
    "who": "Sam",
    "count": 1
},
{
    "who": "Jim",
    "count": 3
},
{
    "who": "Sarah",
    "count": 2
},
{
    "who": "Kathy",
    "count": 2
}

请注意,这是唯一个图片计数!

我无法弄清楚该如何实现...

我假设我需要首先将每个burger解析为一组唯一的url / who,然后从那里进行汇总,但是我不知道如何获取平坦的{{1 }}每个汉堡包。

1 个答案:

答案 0 :(得分:5)

这取决于pattiesbuns数组是否为nested。如果不是,那么很容易,您可以使用脚本运行terms聚合,该脚本从文档的各处收集所有who字段:

POST not-nested/_search 
{
  "size": 0,
  "aggs": {
    "script": {
      "terms": {
        "script": {
          "source": """
          def list = new ArrayList();
          list.addAll(doc['pattyResource.image.who.keyword'].values);
          list.addAll(doc['bunsResource.image.who.keyword'].values);
          list.addAll(doc['bunsResource.buns.image.who.keyword'].values);
          list.addAll(doc['pattyResource.patties.image.who.keyword'].values);
          return list;
          """
        }
      }
    }
  }
}

这将返回以下内容:

  "aggregations" : {
    "script" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Jim",
          "doc_count" : 2
        },
        {
          "key" : "Kathy",
          "doc_count" : 2
        },
        {
          "key" : "Sam",
          "doc_count" : 1
        },
        {
          "key" : "Sarah",
          "doc_count" : 1
        }
      ]
    }
  }

但是,如果嵌套的话,事情会变得更加复杂,因为您需要进行一些客户端工作来计算最终数量,但是我们可以通过一些聚合来简化客户端工作:

POST nested/_search 
{
  "size": 0,
  "aggs": {
    "bunsWho": {
      "terms": {
        "field": "bunsResource.image.who.keyword"
      }
    },
    "bunsWhoNested": {
      "nested": {
        "path": "bunsResource.buns"
      },
      "aggs": {
        "who": {
          "terms": {
            "field": "bunsResource.buns.image.who.keyword"
          }
        }
      }
    },
    "pattiesWho": {
      "terms": {
        "field": "pattyResource.image.who.keyword"
      }
    },
    "pattiesWhoNested": {
      "nested": {
        "path": "pattyResource.patties"
      },
      "aggs": {
        "who": {
          "terms": {
            "field": "pattyResource.patties.image.who.keyword"
          }
        }
      }
    }
  }
}

这将返回以下内容:

  "aggregations" : {
    "pattiesWho" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Kathy",
          "doc_count" : 2
        }
      ]
    },
    "bunsWhoNested" : {
      "doc_count" : 4,
      "who" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "Jim",
            "doc_count" : 2
          },
          {
            "key" : "Kathy",
            "doc_count" : 1
          },
          {
            "key" : "Sarah",
            "doc_count" : 1
          }
        ]
      }
    },
    "pattiesWhoNested" : {
      "doc_count" : 2,
      "who" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "Kathy",
            "doc_count" : 2
          }
        ]
      }
    },
    "bunsWho" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Jim",
          "doc_count" : 1
        },
        {
          "key" : "Sam",
          "doc_count" : 1
        }
      ]
    }
  }

然后您可以简单地创建一些客户端逻辑(此处为Node.js中的一些示例代码)来将数字相加:

var whos = {};
var recordWho = function(who, count) {
    whos[who] = (whos[who] || 0) + count;
};

resp.aggregations.pattiesWho.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.pattiesWhoNested.who.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.bunsWho.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.bunsWhoNested.who.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});

console.log(whos);

=>

{ Kathy: 5, Jim: 3, Sam: 1, Sarah: 1 }