示例文档:
.aar
目前的映射是:
{
"text": "this is my text",
"categories": [
{"category": "sample category"},
{"category": "local news"}
]
}
搜索查询:
{
"topic": {
"properties": {
"categories": {
"properties": {
"category": {
"type": "string",
"store": "no",
"term_vector": "with_positions_offsets",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"include_in_all": "true",
"boost": 8,
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
我想要的结果是:
{
"_source": false,
"query":{
"match":{
"categories.category":"news"
}
},
"aggs": {
"match_count": {
"terms" : {"field": "categories.category.raw"}
}
}
}
结果实际上是(它汇总了所有匹配的文档' categories.category):
{
...
"buckets": [
{
"key": "local news",
"doc_count": 1
}
]
...
}
是否可以在搜索过程中添加{
...
"buckets": [
{
"key": "local news",
"doc_count": 1
},{
"key": "sample category", //THIS PART IS NOT NEEDED
"doc_count": 1
}
]
...
}
?在这种情况下,我们将所有匹配的temporary field
命名为categories.category
,并通过此临时字段categories.match_category
汇总?如果是真的我怎么能这样做,如果不能,我该怎么做呢?
答案 0 :(得分:2)
您的文档中有多个文档,并且需要与其中一些文档进行匹配,您应该将映射更改为nested文档,如下所示:
映射
{
"topic": {
"properties": {
"categories": {
"type":"nested",
"properties": {
"category": {
"type": "string",
"store": "no",
"term_vector": "with_positions_offsets",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"include_in_all": "true",
"boost": 8,
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
然后您可以按如下方式执行查询
{
"_source": false,
"query":{
"filtered":{
"query":{
"match":{
"categories.category":
{
"query" : "news",
"cutoff_frequency" : 0.001
}
}
}
}
},
"aggs": {
"categ": {
"nested" : {
"path" : "categories"
},
"aggs":{
"match_count": {
"terms" : {"field": "categories.category.raw"}
}
}
}
}
}
试试吧
答案 1 :(得分:1)
另一种方法,但更具体地针对您的需求逻辑如下:
<强>映射强>
{
"topic": {
"properties": {
"categories": {
"type":"nested",
"properties": {
"category": {
"type": "string",
"store": "no",
"analyzer": "simple",
"include_in_all": "true",
"boost": 8,
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
数据强>
{
"text": "this is my text",
"categories": [
{"category": "sample category"},
{"category": "local news"}
]
}
<强>查询强>
{
"query":{
"nested":{
"path":"categories",
"query":{
"filtered":{
"query":{
"match":{
"categories.category":"news"
}
}
}
}
}
},
"aggs": {
"nest":{
"nested":{
"path":"categories"
},
"aggs":{
"filt":{
"filter" : {
"script": {
"script" : "doc['categories.category'].values.contains('news')"
}
},
"aggs":{
"match_count": {
"terms" : {"field": "categories.category.raw"}
}
}
}
}
}
}
}
生成结果
{
"_shards": {
"failed": 0,
"successful": 5,
"total": 5
},
"aggregations": {
"nest": {
"doc_count": 2,
"filt": {
"doc_count": 1,
"match_count": {
"buckets": [
{
"doc_count": 1,
"key": "local news"
}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
}
}
}
},
"hits": {
"hits": [],
"max_score": 0.0,
"total": 1
},
"timed_out": false,
"took": 3
}
这里的问题是你必须创建自己的,根据你的需要在聚合中的脚本过滤器,上面的例子在我的“类别”映射中使用了一个简单的分析器