在elasticsearch聚合查询中消除重复文档

时间:2018-04-11 14:03:20

标签: node.js elasticsearch elastic-stack

我有5个索引文档,其中包含一系列主题,其中2个文档具有重复的数组,如下所示

       {
    "_index": "testindex",
    "_type": "testindex",
    "_id": "125",
    "_score": 1,
    "_source": {
        "topics": [
            "Love",
            "Entertainment"
        ]
    }
},
{
    "_index": "testindex",
    "_type": "testindex",
    "_id": "126",
    "_score": 1,
    "_source": {
        "topics": [
            "Health",
            "Fitness"
        ]
    }
},
{
    "_index": "testindex",
    "_type": "testindex",
    "_id": "127",
    "_score": 1,
    "_source": {
        "topics": [
            "Health",
            "Fitness"
        ]
    }
},
{
    "_index": "testindex",
    "_type": "testindex",
    "_id": "128",
    "_score": 1,
    "_source": {
        "topics": [
            "Sports",
            "Cricket"
        ],
    }
},
{
    "_index": "testindex",
    "_type": "testindex",
    "_id": "129",
    "_score": 1,
    "_source": {
        "topics": [
            "Fit",
            "gym"
        ],
    }
}

这是我对Elasticsearch的查询

获取testindex / _search

  {
      "size":0,
      "aggs": {
        "topics": {
          "terms": {
            "field": "topics.keyword",
            "size": 10
          },
          "aggs": {
            "testindex": {
              "top_hits": {
                "size": 10
              }
            }
          }
        }
      }
    }

我的查询结果是

    "aggregations": {
    "topics": {}
      "buckets": [
        {
            "key": "health",
            "doc_count": 2,
            "testindex": {
                "hits": {
                    "total": 2,
                    "max_score": 6.3823323,
                    "hits": [
                        {
                            "_index": "testindex",
                            "_type": "testindex",
                            "_id": "126",
                            "_score": 1,
                            "_source": {
                                "topics": [
                                    "Health",
                                ]
                            }
                        },
                        {
                            "_index": "testindex",
                            "_type": "testindex",
                            "_id": "127",
                            "_score": 1,
                            "_source": {
                                "topics": [
                                    "Health"
                                ]
                            }
                        },
                    ]
                }
            }
        },
        {
            "key": "Fitness",
            "doc_count": 2,
            "testindex": {
                "hits": {
                    "total": 2,
                    "max_score": 6.3823323,
                    "hits": [
                        {
                            "_index": "testindex",
                            "_type": "testindex",
                            "_id": "126",
                            "_score": 1,
                            "_source": {
                                "topics": [
                                    "Fitness",
                                ]
                            }
                        },
                        {
                            "_index": "testindex",
                            "_type": "testindex",
                            "_id": "127",
                            "_score": 1,
                            "_source": {
                                "topics": [
                                    "Fitness"
                                ]
                            }
                        },
                    ]
                }
            }
        },
    ]
}
}

其实我不希望在热门歌曲中复制相同的文件,我用相同的文件ID分别得到主题健康和健身的结果,我只想要一个是健身还是健康, 提前致谢

0 个答案:

没有答案