Question

我使用elasticsearch存储我的生物数据。

我尝试使用过滤的aggs进行查询，但返回的数据不是我想要的。问题来自于我对每个标本都有一个＆＃34; d _＆＃34;属性是一个数组。我只需要对这个数组的某些元素进行aggs，但是我无法过滤它们。

//我手动编辑数据以便更容易理解，这可能是一些拼写错误

我的数据示例：

   [    {
        "_index": "botanique",
        "_type": "specimens",
        "_id": "227CB8A3E2834AAEB50B1ECF6B672180",
        "_score": 1,
        "_source": {
            ....
            "d_": [
                {     // -------------- dont want this
                    "taxonid": "BB7C33A3126648D095BEDDABB0BD2758",
                    "scientificname": "Lastreopsis effusa",
                    "scientificnameauthorship": "(Sw.) Tindale"
                },
                {    // -------------- want this
                    "taxonid": "704FC303D7F74C02912D0FEB5C6FC55D",
                    "scientificname": "Parapolystichum effusum",
                    "scientificnameauthorship": "(sw.) copel."
                }
            ]
        }
    } , {
        "_index": "botanique",
        "_type": "specimens",
        "_id": "11A22DE8E4AD45BBAC7783E508079DCD",
        "_score": 1,
        "_source": {
            ....
            "d_": [
                {     // -------------- want this
                    "taxonid": "A94D243348DF4CAD926B6C3965D948A3",
                    "scientificname": "Parapolystichum effusum",
                    "scientificnameauthorship": "(Sw.) Ching",
                }                   ,
                {    // -------------- dont want this
                    "taxonid": "B01A89AA961A46F2984722C311DC2BDD",
                    "scientificname": "Lastreopsis effusa",
                    "scientificnameauthorship": "(willd. ex schkuhr) proctor"
                }
            ]
        }
    },{
        "_index": "botanique",
        "_type": "specimens",
        "_id": "1647F5E23D304EFAAB9D3E3BE80FD3CE",
        "_score": 1,
        "_source": {
            ...
            "d_": [
                {    // -------------- want this
                    "taxonid": "D70C4478D2B0437AA940994E98D696C5",
                    "scientificname": "Parapolystichum effusum",
                    "scientificnameauthorship": "(Sw.) Ching"
                } ,
                {    // -------------- dont want this
                    "taxonid": "011E5DA526FC4098953DBD1F9E5F4424",
                    "scientificname": "Lastreopsis effusa",
                    "scientificnameauthorship": "(Sw.) Tindale",
                }
            ]
        }
    }
]

例如，我希望所有的＆＃34; d_.scientificnameauthorship＆＃34;和＆＃34; d_.taxonid＆＃34;在哪里＆＃34; d_.scientificname＆＃34;等于＆＃34; parapolystichum effusum＆＃34;。所以我应该（希望）获得＆＃34; scientificnameauthorship＆＃34; ：＆＃34;（sw。）copel。＆＃34; ，＆＃34;（Sw。）Ching＆＃34;但不是＆＃34;（willd.ex schkuhr）监察员＆＃34;。我没有这样做......

我的查询：

{
  "_source": ["d_" ],
  "size": 3,
  "query": {
    "filtered": {"filter": {"bool": {"must": [{"term": {
                "d_.scientificname": "parapolystichum effusum"
    }}] } }}
  },
  "aggs": {
    "scientificname": {
      "terms": {
        "field": "d_.scientificname",
        "size": 1,
        "include": {
          "pattern": "parapolystichum effusum",
          "flags": "CANON_EQ|CASE_INSENSITIVE"
        }
      },
      "aggs": {
        "scientificnameauthorship": {
          "terms": {
            "field": "d_.scientificnameauthorship",
            "size": 10
          }
        }
      }
    }
  }
}

返回的数据包括所有＆＃34; scientificnameauthorship＆＃34;标本

{
    "aggregations": {
        "scientificname": {
            "buckets": [{
                "key": "parapolystichum effusum",
                "doc_count": 269,
                "scientificnameauthorship": {
                    "buckets": [
                        {   // ------ want this 
                            "key": "(sw.) ching",
                            "doc_count": 269
                        }                        ,
                        {   // ------ want this 
                            "key": "(sw.) copel.",
                            "doc_count": 34
                        }                        , 
                        {   // ------ dont want this 
                            "key": "(sw.) tindale",
                            "doc_count": 262
                        }                        ,
                        {   // ------ dont want this 
                            "key": "(willd. ex schkuhr) proctor",
                            "doc_count": 7
                        }                        ,
                        {   // ------ dont want this 
                            "key": "fée",
                            "doc_count": 2
                        }
                    ]
                }
            }]
        }
    }
}

如何在aggs查询中进行编辑？
如何仅在点击中获取数组的项目？

得到这个：

{   
    "hits": {
        "total": 269,
        "max_score": 1,
        "hits": [
            {
                "_index": "botanique",
                "_type": "specimens",
                "_id": "1647F5E23D304EFAAB9D3E3BE80FD3CE",
                "_score": 1,
                "_source": {
                    ...
                    "d_": [{    // -------------- want this
                            "taxonid": "D70C4478D2B0437AA940994E98D696C5",
                            "scientificname": "Parapolystichum effusum",
                            "scientificnameauthorship": "(Sw.) Ching"
                        }]
                }                       
            }
    }
}

而不是：

{   
    "hits": {
        "total": 269,
        "max_score": 1,
        "hits": [
            {
                "_index": "botanique",
                "_type": "specimens",
                "_id": "1647F5E23D304EFAAB9D3E3BE80FD3CE",
                "_score": 1,
                "_source": {
                    ...
                    "d_": [
                        {    // -------------- want this
                            "taxonid": "D70C4478D2B0437AA940994E98D696C5",
                            "scientificname": "Parapolystichum effusum",
                            "scientificnameauthorship": "(Sw.) Ching"
                        } ,
                        {    // -------------- dont want this
                            "taxonid": "011E5DA526FC4098953DBD1F9E5F4424",
                            "scientificname": "Lastreopsis effusa",
                            "scientificnameauthorship": "(Sw.) Tindale",
                        }
                    ]
                }
            }
    }
}

非常感谢

// 编辑1

我也试着在像这样的aggs中放一个过滤器，但不能工作：

{
    "query": {
        "filtered": {"filter": {"bool": {"must": [{"term": {
                    "d_.scientificname": "parapolystichum effusum"
        }}] } }}
    },
    "aggs" : {
        "scientificname" : {
            "filter" : {"term": {
                    "d_.scientificname": "parapolystichum effusum"
            }},
            "aggs": {
                "scientificnameauthorship": {
                  "terms": {
                    "field": "d_.scientificnameauthorship",
                    "size": 10
                  }
                }
              }
        }
    }
}

Answer 1

您可以使用嵌套的aggs作为父聚合器。然后在父聚合器内部创建一个新的过滤器聚合器来过滤数组（列表数据）并附加另一个子聚合器以进行术语聚合。 https://www.elastic.co/guide/en/elasticsearch/reference/1.4/search-aggregations-bucket-nested-aggregation.html 示例查询

"filteredaggs" : {
          "nested" : {
            "path" : "D_"
          },
          "aggs" : {
            "maxdays" : {
              "filter" : {
                "terms" : {
                  "scientificname" : ["xyz", "pqr"]
                }
              },
              "aggs" : {
                "myfinalaggregator" : {
                  "terms" : {
                    "field" : "scientificnameauthorship"
                  }
                }
              }
            }
          }
        }

希望这对你有用。

Answer 2

终于找到了答案，感谢user3775217指向“嵌套”：

{
    "_source" : false,
    "size" : 0,
    "query" : {
        "filtered" : {
            "filter" : {
                "bool" : {
                    "must" : [{
                            "nested" : {
                                "path" : "d_",
                                "query" : {
                                    "bool" : {
                                        "must" : [{
                                                "wildcard" : {
                                                    "d_.scientificname" : {
                                                        "value" : "parapolystichum effusum*"
                                                    }
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "general" : {
            "nested" : {"path" : "d_"},
            "aggs" : {
                "scientificname" : {
                    "terms" : {
                        "field" : "d_.scientificname",
                        "size" : 20,
                        "include" : {
                            "pattern" : "parapolystichum effusum*",
                            "flags" : "CANON_EQ|CASE_INSENSITIVE"
                        }
                    },
                    "aggs" : {
                        "scientificnameauthorship" : {
                            "terms" : {
                                "field" : "d_.scientificnameauthorship",
                                "size" : 10
                            }
                        }
                    }
                }
            }
        }
    }
}

欢迎任何建议，特别是较短的工作答案。

elasticsearch使用aggs过滤数组数据

2 个答案: