弹性搜索可改善重复删除

时间:2016-10-27 10:55:31

标签: asp.net elasticsearch duplicates json.net nest

我在下面的查询中删除了每个索引值的重复项。它首先按名称排序,然后为每个键获取最高的第一个元素。有没有办法编写查询,这样它将分析所有索引值项-keys并删除重复项,而不在asp.net中手动执行此操作? (可能有一个独特结果列表?)

使用的查询:

{
  "aggs": {
    "query": {
      "terms": {
        "field": "name"
      },
      "aggs": {
        "top": {
          "top_hits": {
            "size": 1
          }
        }
      }
    }
  },
    "size": 0,
  "query": {

          "multi_match": {
            "query": "laura",
            "operator": "OR",
            "fields": [
              "name"
            ]
          }
  }
}

检索结果:

        "aggregations": {
            "query": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 3,
              "buckets": [
                {
                  "key": "laura",
                  "doc_count": 15,
                  "top": {
                    "hits": {
                      "total": 15,
                      "max_score": 4.240788,
                      "hits": [
                        {
        "eventCode": 1011020,
        "name": "LAURA MVULA", /* Not duplicated*/
        ...
        },
        {
                  "key": "mvula",
                  "doc_count": 11,
                  "top": {
                    "hits": {
                      "total": 11,
                      "max_score": 4.240788,
                      "hits": [
                        {
        "eventCode": 1011020,
        "name": "LAURA MVULA", /* Duplicate!*/
        ...
        },
        {
                  "key": "lexx",
                  "doc_count": 2,
                  "top": {
                    "hits": {
                      "total": 2,
                      "max_score": 3.3926303,
                      "hits": [
                        {
        "eventCode": 1009639,
        "name": "LAURA LEXX: TYRANNOSAURUS LEXX", /* Not duplicated*/
        ...
        }

Settings used:

    ...
                  "name": {
                    "type": "string",
                    "fields": {
                      "raw": {
                        "type": "string",
                        "index": "not_analyzed"
                      }
                    }
                  },
    ...

使用的映射:

...
              "name": {
                "type": "string",
                "fields": {
                  "raw": {
                    "type": "string",
                    "index": "not_analyzed"
                  }
                }
              },
...

0 个答案:

没有答案