我想计算elasticsearch中特定字段的文档数量。我正在使用感觉

时间:2016-04-13 21:18:21

标签: elasticsearch

我的意思是:

GET testindexnew1/_search
{
   "fields": ["T","U", "UD", "UE", "gsaentity_executives","extension.viewport","extension.google-site-verification","extension.og:description"],
  "query": {
    "bool": {
      "should": [
        { "match": { "gsaentity_executives":  "firstname lastname" }},
        { "match": { "gsaentity_executives": "firstname2 lastname2"   }}
      ]
    }
  },
"size": 10, 

  "aggs": {
    "tags": {
      "significant_terms": {
        "field": "newContent"
      }
    }
  },

  "highlight" : {
        "fields" : {
            "newContent" : {"fragment_size" : 150, "number_of_fragments" : 1}
        }
    }
} 

我使用匹配查询来匹配主管姓名“firstname lastname”和“firstname2 lastname2”。此外,我使用术语聚合来返回文档计数。我得到的结果如下:

{
  "took": 31,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "failed": 0
  },
  "hits": {
    "total": 6,
    "max_score": 4.021257,
    "hits": [
      {
        "_index": "testindexnew1",
        "_type": "SearchTech",
        "_id": "http://www.searchtechnologies.com/blog",
        "_score": 4.021257,
        "fields": {
          "U": [
            "http://www.searchtechnologies.com/blog"
          ],
          "extension.google-site-verification": [
            "jP1bIfjuuyZUYfTkYc_O6ZlTHxCm07voTDcMk72Z8oQ"
          ],
          "T": [
            "Search and Big Data Insights - Search Technologies Blog"
          ],
          "UD": [
            "http://www.searchtechnologies.com/blog"
          ],
          "extension.viewport": [
            "width=device-width, initial-scale=1, maximum-scale=1"
          ],
          "gsaentity_executives": [
            "firstname lastname, firstname2 lastname2"
          ],
          "UE": [
            "http://www.searchtechnologies.com/blog"
          ],
          "extension.og:description": [
            "Search Technologies is the largest IT services company dedicated to enterprise search and big data implementation, consulting and managed Services"
          ]
        }
      },
      {
        "_index": "testindexnew1",
        "_type": "SearchTech",
        "_id": "http://www.searchtechnologies.com/executive-team",
        "_score": 2.0106285,
        "fields": {
          "U": [
            "http://www.searchtechnologies.com/executive-team"
          ],
          "extension.google-site-verification": [
            "jP1bIfjuuyZUYfTkYc_O6ZlTHxCm07voTDcMk72Z8oQ"
          ],
          "T": [
            "Search Technologies Executive Team"
          ],
          "UD": [
            "http://www.searchtechnologies.com/executive-team"
          ],
          "extension.viewport": [
            "width=device-width, initial-scale=1, maximum-scale=1"
          ],
          "gsaentity_executives": [
            "firstname lastname, firstname2 lastname2"
          ],
          "UE": [
            "http://www.searchtechnologies.com/executive-team"
          ],
          "extension.og:description": [
            "Our executive team is the most experienced in the search and analytics business with an average of more than 18 years experience"
          ]
        }
      },
      {
        "_index": "testindexnew1",
        "_type": "SearchTech",
        "_id": "http://www.searchtechnologies.com/solr-hadoop-integration",
        "_score": 1.3831896,
        "fields": {
          "U": [
            "http://www.searchtechnologies.com/solr-hadoop-integration"
          ],
          "extension.google-site-verification": [
            "jP1bIfjuuyZUYfTkYc_O6ZlTHxCm07voTDcMk72Z8oQ"
          ],
          "T": [
            "Solr | Hadoop Integration"
          ],
          "UD": [
            "http://www.searchtechnologies.com/solr-hadoop-integration"
          ],
          "extension.viewport": [
            "width=device-width, initial-scale=1, maximum-scale=1"
          ],
          "gsaentity_executives": [
            "firstname2 lastname2"
          ],
          "UE": [
            "http://www.searchtechnologies.com/solr-hadoop-integration"
          ],
          "extension.og:description": [
            "Solr is the natural choice for searching over Hadoop data. Search Technologies is the leading IT services company dedicated to implementing enterprise search and unstructured big data applications."
          ]
        }
      },
      {
        "_index": "testindexnew1",
        "_type": "SearchTech",
        "_id": "http://www.searchtechnologies.com/news-adecco-improves-recruiter-efficiency-with-search",
        "_score": 1.1300961,
        "fields": {
          "U": [
            "http://www.searchtechnologies.com/news-adecco-improves-recruiter-efficiency-with-search"
          ],
          "extension.google-site-verification": [
            "jP1bIfjuuyZUYfTkYc_O6ZlTHxCm07voTDcMk72Z8oQ"
          ],
          "T": [
            "Search Technologies Helps Adecco Group Significantly Improve Recruiter Efficiency"
          ],
          "UD": [
            "http://www.searchtechnologies.com/news-adecco-improves-recruiter-efficiency-with-search"
          ],
          "extension.viewport": [
            "width=device-width, initial-scale=1, maximum-scale=1"
          ],
          "gsaentity_executives": [
            "firstname lastname"
          ],
          "UE": [
            "http://www.searchtechnologies.com/news-adecco-improves-recruiter-efficiency-with-search"
          ],
          "extension.og:description": [
            "Custom Search and Match application based on Cloudera and Solr improves Adecco's recruiters' response times and fill rates."
          ]
        }
      },
      {
        "_index": "testindexnew1",
        "_type": "SearchTech",
        "_id": "http://www.searchtechnologies.com/search-big-data-videos",
        "_score": 1.1300961,
        "fields": {
          "U": [
            "http://www.searchtechnologies.com/search-big-data-videos"
          ],
          "extension.google-site-verification": [
            "jP1bIfjuuyZUYfTkYc_O6ZlTHxCm07voTDcMk72Z8oQ"
          ],
          "T": [
            "Enterprise Search & Big Data Videos"
          ],
          "UD": [
            "http://www.searchtechnologies.com/search-big-data-videos"
          ],
          "extension.viewport": [
            "width=device-width, initial-scale=1, maximum-scale=1"
          ],
          "gsaentity_executives": [
            "firstname lastname"
          ],
          "UE": [
            "http://www.searchtechnologies.com/search-big-data-videos"
          ],
          "extension.og:description": [
            "A collection of videos discussing enterprise search and big data concepts, business applications, and real-world case stories."
          ]
        }
      },
      {
        "_index": "testindexnew1",
        "_type": "SearchTech",
        "_id": "http://www.searchtechnologies.com/faq",
        "_score": 1.1300961,
        "fields": {
          "U": [
            "http://www.searchtechnologies.com/faq"
          ],
          "extension.google-site-verification": [
            "jP1bIfjuuyZUYfTkYc_O6ZlTHxCm07voTDcMk72Z8oQ"
          ],
          "T": [
            "Search Technologies: Frequently Asked Questions (FAQ)"
          ],
          "UD": [
            "http://www.searchtechnologies.com/faq"
          ],
          "extension.viewport": [
            "width=device-width, initial-scale=1, maximum-scale=1"
          ],
          "gsaentity_executives": [
            "firstname lastname"
          ],
          "UE": [
            "http://www.searchtechnologies.com/faq"
          ],
          "extension.og:description": [
            "Search Technologies FAQ: Where did Search Technologies come from?   What are our key differentiators?  How do our customers view us?"
          ]
        }
      }
    ]
  },
  "aggregations": {
    "tags": {
      "doc_count": 6,
      "buckets": [
        {
          "key": "lastname",
          "doc_count": 5,
          "score": 13.055555555555557,
          "bg_count": 5
        },
        {
          "key": "firstname",
          "doc_count": 5,
          "score": 13.055555555555557,
          "bg_count": 5
        },
        {
          "key": "ceo",
          "doc_count": 5,
          "score": 13.055555555555557,
          "bg_count": 5
        },
        {
          "key": "lastname2",
          "doc_count": 3,
          "score": 7.833333333333333,
          "bg_count": 3
        },
        {
          "key": "candidates",
          "doc_count": 4,
          "score": 6.7407407407407405,
          "bg_count": 6
        },
        {
          "key": "retention",
          "doc_count": 3,
          "score": 5.75,
          "bg_count": 4
        },
        {
          "key": "firstname2",
          "doc_count": 3,
          "score": 5.75,
          "bg_count": 4
        },
        {
          "key": "known",
          "doc_count": 3,
          "score": 5.75,
          "bg_count": 4
        },
        {
          "key": "it’s",
          "doc_count": 3,
          "score": 5.75,
          "bg_count": 4
        },
        {
          "key": "said",
          "doc_count": 3,
          "score": 5.75,
          "bg_count": 4
        }
      ]
    }
  }
}

您可以从结果中看到我分别为lastname,firstname2,lastname2等获取文档计数。

 "aggregations": {
    "tags": {
      "doc_count": 6,
      "buckets": [
        {
          "key": "lastname",
          "doc_count": 5,
          "score": 13.055555555555557,
          "bg_count": 5
        },
        {
          "key": "firstname",
          "doc_count": 5,
          "score": 13.055555555555557,
          "bg_count": 5
        },

我希望我的查询为整个高管名称“firstname lastname”,“firstname2 lastname2”等计算文档。但我分别为执行的名字和姓氏获取文件计数。

我是ES的新手,所以我可能错过了一些东西。谢谢你的帮助!

1 个答案:

答案 0 :(得分:0)

您需要一个自定义分析器和类似于以下内容的映射,以便聚合字段:

{
  "settings": {
    "analysis": {
      "tokenizer": {
        "comma": {
          "type": "pattern",
          "pattern": ","
        }
      },
      "analyzer": {
        "comma": {
          "type": "custom",
          "tokenizer": "comma",
          "filter": [
            "trim"
          ]
        }
      }
    }
  },
  "mappings": {
    "SearchTech": {
      "properties": {
        "gsaentity_executives": {
          "type": "string",
          "fields": {
            "comma_separated": {
              "type": "string",
              "analyzer": "comma"
            }
          }
        }

聚合:

  "aggs": {
    "tags": {
      "terms": {
        "field": "gsaentity_executives.comma_separated",
        "size": 10
      }
    }
  }