在查询中使用热门匹配

时间:2019-03-25 11:20:55

标签: java elasticsearch

当我运行以下查询时,用于在特定日期范围内按creator_cust_id进行分组:

{  
   "from":0,
   "size":0,
   "query":{  
      "bool":{  
         "must":[  
            {  
               "range":{  
                  "lead_created_time":{  
                     "from":"2019-02-01 00:00:00",
                     "to":"2019-03-01 00:00:00",
                     "include_lower":true,
                     "include_upper":true,
                     "format":"yyyy-MM-dd HH:mm:ss",
                     "boost":1.0
                  }
               }
            }
         ],
         "adjust_pure_negative":true,
         "boost":1.0
      }
   },
   "aggregations":{  
      "creator_cust_id":{  
         "terms":{  
            "field":"creator_cust_id.keyword",
            "size":1000,
            "min_doc_count":1,
            "shard_min_doc_count":0,
            "show_term_doc_count_error":false,
            "order":[  
               {  
                  "_count":"desc"
               },
               {  
                  "_key":"asc"
               }
            ]
         },
         "aggregations":{  
            "data_fields":{  
               "top_hits":{  
                  "from":0,
                  "size":1,
                  "version":false,
                  "explain":false,
                  "_source":{  
                     "includes":[  
                        "creator.agent_name",
                        "creator.agent_team"
                     ],
                     "excludes":[  

                     ]
                  }
               }
            }
         }
      }
   }
}

我得到了所有所需数据(agent_nameagent_team)的正确结果,

{
    "took": 6,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 288,
        "max_score": 0,
        "hits": []
    },
    "aggregations": {
        "creator_cust_id": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "1000013335",
                    "doc_count": 38,
                    "data_fields": {
                        "hits": {
                            "total": 38,
                            "max_score": 1,
                            "hits": [
                                {
                                    "_index": "oereporting",
                                    "_type": "doc",
                                    "_id": "a27dce83-9141-4fac-8a59-787e6ee9aaa5",
                                    "_score": 1,
                                    "_source": {
                                        "creator": [
                                            {
                                                "agent_team": "Self Signup",
                                                "agent_name": "Aashit Sharma"
                                            }
                                        ]
                                    }
                                }
                            ]
                        }
                    }
                },
                {
                    "key": "1107221321",
                    "doc_count": 16,
                    "data_fields": {
                        "hits": {
                            "total": 16,
                            "max_score": 1,
                            "hits": [
                                {
                                    "_index": "oereporting",
                                    "_type": "doc",
                                    "_id": "3b5e21e8-7283-49a4-a4d9-88105ce2b03f",
                                    "_score": 1,
                                    "_source": {
                                        "creator": [
                                            {
                                                "agent_team": "KAM",
                                                "agent_name": "MAYANK KUMAR"
                                            }
                                        ]
                                    }
                                }
                            ]
                        }
                    }
                }
            ]
        }
    }
}

但是,当我添加查询以获取特定的creator_cust_id时,在一种情况下( 1000013335 )我获得了正确的数据,而在另一种情况下( > 1107221321 )。

以下是带有结果的各个查询:

获取所需数据

{
  "from": 0,
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "lead_created_time": {
              "from": "2019-02-01 00:00:00",
              "to": "2019-03-01 00:00:00",
              "include_lower": true,
              "include_upper": true,
              "format": "yyyy-MM-dd HH:mm:ss",
              "boost": 1
            }
          }
        },
        {
          "match": {
            "creator_cust_id": {
              "query": "1000013335",
              "operator": "OR",
              "prefix_length": 0,
              "max_expansions": 50,
              "fuzzy_transpositions": true,
              "lenient": false,
              "zero_terms_query": "NONE",
              "auto_generate_synonyms_phrase_query": true,
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  },
  "aggregations": {
    "creator_cust_id": {
      "terms": {
        "field": "creator_cust_id.keyword",
        "size": 1000,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false,
        "order": [
          {
            "_count": "desc"
          },
          {
            "_key": "asc"
          }
        ]
      },
      "aggregations": {
        "data_fields": {
          "top_hits": {
            "from": 0,
            "size": 1,
            "version": false,
            "explain": false,
            "_source": {
              "includes": [
                "creator.agent_name",
                "creator.agent_team"
              ],
              "excludes": []
            }
          }
        }
      }
    }
  }
}

结果

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 38,
        "max_score": 0,
        "hits": []
    },
    "aggregations": {
        "creator_cust_id": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "1000013335",
                    "doc_count": 38,
                    "data_fields": {
                        "hits": {
                            "total": 38,
                            "max_score": 4.0910425,
                            "hits": [
                                {
                                    "_index": "oereporting",
                                    "_type": "doc",
                                    "_id": "a27dce83-9141-4fac-8a59-787e6ee9aaa5",
                                    "_score": 4.0910425,
                                    "_source": {
                                        "creator": [
                                            {
                                                "agent_team": "Self Signup",
                                                "agent_name": "Aashit Sharma"
                                            }
                                        ]
                                    }
                                }
                            ]
                        }
                    }
                }
            ]
        }
    }
}

未获取所需数据

{
  "from": 0,
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "lead_created_time": {
              "from": "2019-02-01 00:00:00",
              "to": "2019-03-01 00:00:00",
              "include_lower": true,
              "include_upper": true,
              "format": "yyyy-MM-dd HH:mm:ss",
              "boost": 1
            }
          }
        },
        {
          "match": {
            "creator_cust_id": {
              "query": "1107221321",
              "operator": "OR",
              "prefix_length": 0,
              "max_expansions": 50,
              "fuzzy_transpositions": true,
              "lenient": false,
              "zero_terms_query": "NONE",
              "auto_generate_synonyms_phrase_query": true,
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  },
  "aggregations": {
    "creator_cust_id": {
      "terms": {
        "field": "creator_cust_id.keyword",
        "size": 1000,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false,
        "order": [
          {
            "_count": "desc"
          },
          {
            "_key": "asc"
          }
        ]
      },
      "aggregations": {
        "data_fields": {
          "top_hits": {
            "from": 0,
            "size": 1,
            "version": false,
            "explain": false,
            "_source": {
              "includes": [
                "creator.agent_name",
                "creator.agent_team"
              ],
              "excludes": []
            }
          }
        }
      }
    }
  }
}

结果

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 16,
        "max_score": 0,
        "hits": []
    },
    "aggregations": {
        "creator_cust_id": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "1107221321",
                    "doc_count": 16,
                    "data_fields": {
                        "hits": {
                            "total": 16,
                            "max_score": 4.410047,
                            "hits": [
                                {
                                    "_index": "oereporting",
                                    "_type": "doc",
                                    "_id": "bd5098ea-9ba2-44b8-83ef-8d28aa63c2c7",
                                    "_score": 4.410047,
                                    "_source": {}
                                }
                            ]
                        }
                    }
                }
            ]
        }
    }
}

在此处注意空白的source字段。

我无法理解为什么会出现这样的差异,因为我在很多聚合中都使用了该查询,到目前为止没有任何错误。

我通过使用Filter Aggregation进行了尝试,并且工作正常,但是我不能使用Filter Aggregation。这种差异背后的逻辑是什么?如何使用query解决该问题?

我知道它不会有任何区别,但是我使用的是Java High-Level Rest Client,而这些查询仅是使用它进行的。

请在评论中询问是否缺少任何信息或需要解释。

0 个答案:

没有答案