复杂弹性搜索查询

时间:2020-06-25 02:21:18

标签: elasticsearch kibana

弹性搜索索引中有以下文档。

#include <iostream>
#include <fstream>
#include <cmath>
#include <algorithm>
#include <vector>

using namespace std;

int main(){
    vector <int> nums;

    string str;
    cin >> str;

    int lastcomma = -1;
    while(str.find(',', lastcomma+1) != string::npos){ // find the next comma
        int curr = str.find(',', lastcomma+1);

        // stoi converts a string to an integer; just what you need
        nums.push_back(stoi(str.substr(lastcomma+1, curr - (lastcomma+1))));

        lastcomma = curr;
    }
    
    // get the last number
    nums.push_back(stoi(str.substr(lastcomma+1, str.size()-(lastcomma+1))));
  

    return 0;
}

我要从中将结果[]留空。在这里我们可以看到两个文档的uid相同。我正在使用以下查询来获取结果:

[{
        "_index": "ten2",
        "_type": "documents",
        "_id": "c323c2244a4a4c22_en-us",
        "_source": {
            "publish_details": [{
                    "environment": "603fe91adbdcff66",
                    "time": "2020-06-24T13:36:55.514Z",
                    "locale": "hi-in",
                    "user": "aadab2f531206e9d",
                    "version": 1
                },
                {
                    "environment": "603fe91adbdcff66",
                    "time": "2020-06-24T13:36:55.514Z",
                    "locale": "en-us",
                    "user": "aadab2f531206e9d",
                    "version": 1
                }
            ],
            "created_at": "2020-06-24T13:36:43.037Z",
            "_in_progress": false,
            "title": "Entry 1",
            "locale": "en-us",
            "url": "/entry-1",
            "tags": [],
            "uid": "c323c2244a4a4c22",
            "updated_at": "2020-06-24T13:36:43.037Z",
            "fields": []
        }
    },
    {
        "_index": "ten2",
        "_type": "documents",
        "_id": "c323c2244a4a4c22_mr-in",
        "_source": {
            "publish_details": [{
                "environment": "603fe91adbdcff66",
                "time": "2020-06-24T13:37:26.205Z",
                "locale": "mr-in",
                "user": "aadab2f531206e9d",
                "version": 1
            }],
            "created_at": "2020-06-24T13:36:43.037Z",
            "_in_progress": false,
            "title": "Entry 1 marathi",
            "locale": "mr-in",
            "url": "/entry-1",
            "tags": [],
            "uid": "c323c2244a4a4c22",
            "updated_at": "2020-06-24T13:37:20.092Z",
            "fields": []
        }
    }
]

但是上面的查询给了我全部2个文档,但是我希望得到结果作为银行,这里的原因是uid很常见,并且uid包含发布详细信息中的所有三个local。因此,获取有效结果的方法是,是否有任何聚合查询在这里对我有帮助。这只是一个示例,我有很多文档要过滤掉。 Kindle在这里帮助我。

1 个答案:

答案 0 :(得分:1)

{
  "aggs": {
    "agg1": {
      "terms": {
        "field": "uid.raw"
      },
      "aggs": {
        "agg2": {
          "nested": {
            "path": "publish_details"
          },
          "aggs": {
            "locales": {
              "terms": {
                "field": "publish_details.locale"
              }
            }
          }
        }
      }
    }
  }
}

此查询将先按uid然后按publish_details.locale分组您

它提供如下结果

"aggregations": {
        "agg1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "c323c2244a4a4c22",
                    "doc_count": 2,
                    "agg2": {
                        "doc_count": 3,
                        "locales": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "en-us",
                                    "doc_count": 1
                                },
                                {
                                    "key": "hi-in",
                                    "doc_count": 1
                                },
                                {
                                    "key": "mr-in",
                                    "doc_count": 1
                                }
                            ]
                        }
                    }
                },
                {
                    "key": "c323c2244rrffa4a4c22",
                    "doc_count": 1,
                    "agg2": {
                        "doc_count": 2,
                        "locales": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "en-us",
                                    "doc_count": 1
                                },
                                {
                                    "key": "hi-in",
                                    "doc_count": 1
                                }
                            ]
                        }
                    }
                }
            ]

我有3个文档,其中两个具有相同的ID,而另一个则不同。

我将进一步更新查询,以删除您有3个存储桶的第一个结果。您也可以在代码中进一步处理它。

您可以做到。 10k个文档很好。但是当您拥有数以百万计的资产时,您应该有足够的资源来执行此操作。

{
  "size" : 0,
  "query":{
      "bool" :{
          "must_not":{
              "match":{
                "publish_details.environment":"603fe91adbdcff66"
              }
          }
      }
  },
  "aggs": {
    "uids": {
      "terms": {
        "field": "uid.raw"
      },
      "aggs": {
        "details": {
          "nested": {
            "path": "publish_details"
          },
          "aggs": {
            "locales": {
              "terms": {
                "field": "publish_details.locale"
              }
            },   
            "unique_locales": {
                "value_count": {
                    "field": "publish_details.locale"
                }
            }
          }
        }
      }
    }
  }
}

结果:

"aggregations": {
        "uids": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "c323c2244a4a4c22",
                    "doc_count": 2,
                    "details": {
                        "doc_count": 3,
                        "locales": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "en-us",
                                    "doc_count": 1
                                },
                                {
                                    "key": "hi-in",
                                    "doc_count": 1
                                },
                                {
                                    "key": "mr-in",
                                    "doc_count": 1
                                }
                            ]
                        },
                        "unique_locales": {
                            "value": 3
                        }
                    }
                },
                {
                    "key": "c323c2244rrffa4a4c22",
                    "doc_count": 1,
                    "details": {
                        "doc_count": 2,
                        "locales": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "en-us",
                                    "doc_count": 1
                                },
                                {
                                    "key": "hi-in",
                                    "doc_count": 1
                                }
                            ]
                        },
                        "unique_locales": {
                            "value": 2
                        }
                    }
                }
            ]