ElasticSearch聚合字符串字段中的所有标记

时间:2017-02-19 04:05:44

标签: elasticsearch aggregation

我有ElasticSearch 2.4,我正在尝试对包含多个令牌的String类型的文本字段进行聚合。有问题的字段是一个名为mailingAddress的地址字段。例如,下面是一些在地址字段中查找NY的结果。

{
  "from": 0,
  "size": 100,
  "sort": [
    {
      "_score": {
        "order": "desc"
      }
    }
  ],
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "must": [
              {
                "match": {
                  "customerprofile.mailingAddress": {
                    "query": "NY",
                    "fuzziness": 0,
                    "operator": "or"
                  }
                }
              },
              {
                "match": {
                  "customerprofile.companyId": {
                    "query": "999",
                    "fuzziness": 0,
                    "operator": "or"
                  }
                }
              }
            ]
          }
        }
      ]
    }
  }
}

返回

"hits":[  
   {  
      "_index":"wht_index_prod_v33_es24",
      "_type":"customerprofile",
      "_id":"2044",
      "_score":2.9787974,
      "_source":{  
         "customerId":2044,
         "companyId":2007,
         "fullName":"John Doe",
         "email":"jon@aol.com",
         "pictureURL":"john.png",
         "profilePictureContentType":"image/png",
         "phone":"(703) 999-8888",
         "mailingAddress":"100 Lake Braddock Drive\nBurke, NY 22015",
         "gender":"Male",
         "emergencyContactsIds":[  

         ],
         "wantCorrespondence":false
      }
   },
   {  
      "_index":"wht_index_prod_v33_es24",
      "_type":"customerprofile",
      "_id":"2045",
      "_score":2.9787974,
      "_source":{  
         "customerId":2045,
         "companyId":2007,
         "fullName":"Jane Anderson",
         "email":"janea@touchva.net",
         "pictureURL":"JAnderson.png",
         "profilePictureContentType":"image/png",
         "phone":"(434) 111-2345",
         "mailingAddress":"PO Box 333, Boydton, NY 23917",
         "gender":"Male",
         "emergencyContactsIds":[  

         ],
         "wantCorrespondence":false
      }
   },
..
..
]

问题
当我通过mailingAddress进行聚合时,我希望在文本字段中看到每个单词的存储桶。从上面的结果我还希望找到一个名为'NY'的桶键,但没有一个。任何人都可以解释为什么 - 我的猜测是它的条目太少了吗?

聚合:

{
  "size": 0,
  "aggs": {
    "group_by_age": {
      "terms": {
        "field": "mailingAddress"
      },
      "aggs": {
        "group_by_gender": {
          "terms": {
            "field": "gender"
          }
        }
      }
    }
  }
}

汇总结果:

{
  "took": 16,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "failed": 0
  },
  "hits": {
    "total": 401,
    "max_score": 0,
    "hits": [

    ]
  },
  "aggregations": {
    "group_by_age": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 1041,
      "buckets": [
        {
          "key": "st",
          "doc_count": 30,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 17
              },
              {
                "key": "male",
                "doc_count": 13
              }
            ]
          }
        },
        {
          "key": "ca",
          "doc_count": 28,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 21
              },
              {
                "key": "male",
                "doc_count": 7
              }
            ]
          }
        },
        {
          "key": "dr",
          "doc_count": 16,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 13
              },
              {
                "key": "male",
                "doc_count": 3
              }
            ]
          }
        },
        {
          "key": "street",
          "doc_count": 15,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 11
              },
              {
                "key": "male",
                "doc_count": 4
              }
            ]
          }
        },
        {
          "key": "ave",
          "doc_count": 14,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 7
              },
              {
                "key": "male",
                "doc_count": 7
              }
            ]
          }
        },
        {
          "key": "box",
          "doc_count": 11,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 9
              },
              {
                "key": "male",
                "doc_count": 2
              }
            ]
          }
        },
        {
          "key": "fl",
          "doc_count": 11,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 9
              },
              {
                "key": "male",
                "doc_count": 2
              }
            ]
          }
        },
        {
          "key": "va",
          "doc_count": 11,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "male",
                "doc_count": 6
              },
              {
                "key": "female",
                "doc_count": 5
              }
            ]
          }
        },
        {
          "key": "n",
          "doc_count": 10,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 7
              },
              {
                "key": "male",
                "doc_count": 3
              }
            ]
          }
        },
        {
          "key": "az",
          "doc_count": 9,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 7
              },
              {
                "key": "male",
                "doc_count": 2
              }
            ]
          }
        }
      ]
    }
  }
}

1 个答案:

答案 0 :(得分:1)

默认情况下,terms聚合会返回前10个字词,但您可以通过在聚合中指定size来决定返回更多字段,如下所示:

{
  "size": 0,
  "aggs": {
    "group_by_age": {
      "terms": {
        "field": "mailingAddress",
        "size": 50                       <---- add this
      },
      "aggs": {
        "group_by_gender": {
          "terms": {
            "field": "gender"
          }
        }
      }
    }
  }
}

您的里程可能会有所不同,您可能需要增加尺寸才能真正看到NY