Question

我有一个 elasticsearch 索引 (v6.8)，其中包含可能共享一个字段的相似值的文档。

[
    { 
        "siren": 123,
        "owner": "A",
        "price": 10
    },
    { 
        "siren": 123,
        "owner": "B",
        "price": 20
    },
    { 
        "siren": 456,
        "owner": "A",
        "price": 10
    },
    { 
        "siren": 456,
        "owner": "C",
        "price": 30
    } 
]

我想从所有者 A 和 B 处获取所有文档，但在警报器字段中进行了重复数据删除。结果会是。我不在乎返回哪条重复数据删除的行（来自所有者 A 或 B）。

[
    { 
        "siren": 123,
        "owner": "A",
        "price": 10
    },
    { 
        "siren": 456,
        "owner": "A",
        "price": 10
    }
]

此外，我希望我的聚合能够计算在同一字段中重复数据删除的文档。

我试过了

{
    "query": {
        "bool": {
            "must": [
                [
                    {
                        "terms": {
                            "owner": [
                                "A",
                                "B"
                            ]
                        }
                    }
                ]
            ]
        }
    },
    "aggs": {
        "by_price": {
            "terms": {
                "field": "price",
                "size": 20
            }
        }
    }
}

但这会多次计算“相同”文档。

Answer 1

您可以在 siren 字段上使用术语聚合以及热门点击聚合

 {
  "size":0,
  "query": {
    "bool": {
      "must": [
        {
          "terms": {
            "owner.keyword": [
              "A",
              "B"
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "by_price": {
      "terms": {
        "field": "siren",
        "size": 20
      },
      "aggs": {
        "top_sales_hits": {
          "top_hits": {
            "_source": {
              "includes": [
                "siren",
                "owner",
                "price"
              ]
            },
            "size": 1
          }
        }
      }
    }
  }
}

搜索结果将是

"aggregations": {
    "by_price": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": 123,
          "doc_count": 2,
          "top_sales_hits": {
            "hits": {
              "total": {
                "value": 2,
                "relation": "eq"
              },
              "max_score": 1.0,
              "hits": [
                {
                  "_index": "66226467",
                  "_type": "_doc",
                  "_id": "1",
                  "_score": 1.0,
                  "_source": {
                    "owner": "A",          // note this
                    "siren": 123,
                    "price": 10
                  }
                }
              ]
            }
          }
        },
        {
          "key": 456,
          "doc_count": 1,
          "top_sales_hits": {
            "hits": {
              "total": {
                "value": 1,
                "relation": "eq"
              },
              "max_score": 1.0,
              "hits": [
                {
                  "_index": "66226467",
                  "_type": "_doc",
                  "_id": "3",
                  "_score": 1.0,
                  "_source": {
                    "owner": "A",       // note this
                    "siren": 456,
                    "price": 10
                  }
                }
              ]
            }
          }
        }
      ]
    }
  }

根据字段

1 个答案: