ElasticSearch:是否有可能产生一个"临时字段"在搜索请求期间?

时间:2016-03-08 07:53:16

标签: elasticsearch

示例文档:

.aar

目前的映射是:

{
    "text": "this is my text",
    "categories": [
        {"category": "sample category"},
        {"category": "local news"}
    ]
}

搜索查询:

{
    "topic": {
        "properties": {
            "categories": {
                "properties": {
                    "category": {
                        "type": "string",
                        "store": "no",
                        "term_vector": "with_positions_offsets",
                        "analyzer": "ik_max_word",
                        "search_analyzer": "ik_max_word",
                        "include_in_all": "true",
                        "boost": 8,
                        "fields": {
                            "raw": {
                                "type": "string",
                                "index": "not_analyzed"
                            }
                        }
                    }
                }
            }
        }
    }
}

我想要的结果是:

{
    "_source": false,
    "query":{
        "match":{
            "categories.category":"news"
        }
    },
    "aggs": {
        "match_count": {
            "terms" : {"field": "categories.category.raw"}
        }
    }
}

结果实际上是(它汇总了所有匹配的文档' categories.category):

{
    ...
    "buckets": [
        {
            "key": "local news",
            "doc_count": 1
        } 

    ]
    ...
}

是否可以在搜索过程中添加{ ... "buckets": [ { "key": "local news", "doc_count": 1 },{ "key": "sample category", //THIS PART IS NOT NEEDED "doc_count": 1 } ] ... } ?在这种情况下,我们将所有匹配的temporary field命名为categories.category,并通过此临时字段categories.match_category汇总?如果是真的我怎么能这样做,如果不能,我该怎么做呢?

2 个答案:

答案 0 :(得分:2)

您的文档中有多个文档,并且需要与其中一些文档进行匹配,您应该将映射更改为nested文档,如下所示:

映射

{
    "topic": {
        "properties": {
            "categories": {
                "type":"nested",
                "properties": {
                    "category": {
                        "type": "string",
                        "store": "no",
                        "term_vector": "with_positions_offsets",
                        "analyzer": "ik_max_word",
                        "search_analyzer": "ik_max_word",
                        "include_in_all": "true",
                        "boost": 8,
                        "fields": {
                            "raw": {
                                "type": "string",
                                "index": "not_analyzed"
                            }
                        }
                    }
                }
            }
        }
    }
}

然后您可以按如下方式执行查询

{
    "_source": false,
    "query":{
      "filtered":{
         "query":{
            "match":{
               "categories.category":
               {
                 "query" : "news",
                 "cutoff_frequency" : 0.001
               }
            }
         }
      }
    },
    "aggs": {
        "categ": {
           "nested" : {
             "path" : "categories"
           },
           "aggs":{
             "match_count": {       
               "terms" : {"field": "categories.category.raw"}
             }
           }
        }
    }
}

试试吧

答案 1 :(得分:1)

另一种方法,但更具体地针对您的需求逻辑如下:

<强>映射

 {
    "topic": {
        "properties": {
            "categories": {
                "type":"nested",
            "properties": {
                    "category": {
                        "type": "string",
                        "store": "no",
                        "analyzer": "simple",
                        "include_in_all": "true",
                        "boost": 8,
                        "fields": {
                            "raw": {
                                "type": "string",
                                "index": "not_analyzed"
                            }
                        }
                    }
                }
            }
        }
    }
}

数据

{
    "text": "this is my text",
    "categories": [
        {"category": "sample category"},
        {"category": "local news"}
    ]
}

<强>查询

{
  "query":{
    "nested":{
      "path":"categories",
      "query":{
        "filtered":{
          "query":{
            "match":{
              "categories.category":"news"
            }
          }
        }
      }
    }
  },
  "aggs": {
    "nest":{
      "nested":{
        "path":"categories"

      },
      "aggs":{
        "filt":{
          "filter" : {
            "script": {
              "script" : "doc['categories.category'].values.contains('news')"
            }
          },
          "aggs":{
            "match_count": {
              "terms" : {"field": "categories.category.raw"}
            }
          }
        }
      }
    }
  }
}

生成结果

{
    "_shards": {
        "failed": 0, 
        "successful": 5, 
        "total": 5
    }, 
    "aggregations": {
        "nest": {
            "doc_count": 2, 
            "filt": {
                "doc_count": 1, 
                "match_count": {
                    "buckets": [
                        {
                            "doc_count": 1, 
                            "key": "local news"
                        }
                    ], 
                    "doc_count_error_upper_bound": 0, 
                    "sum_other_doc_count": 0
                }
            }
        }
    }, 
    "hits": {
        "hits": [], 
        "max_score": 0.0, 
        "total": 1
    }, 
    "timed_out": false, 
    "took": 3
}

这里的问题是你必须创建自己的,根据你的需要在聚合中的脚本过滤器,上面的例子在我的“类别”映射中使用了一个简单的分析器