Elasticsearch,嵌套聚合

时间:2015-10-07 23:50:25

标签: elasticsearch nested aggregation

我正在编写动态查询生成,它允许通过映射中的任何字段组合进行聚合。作为下面的映射(截断),嵌套类型中有字段。例如,通过[activities.activity,duration]或[activities.activity,activities.duration]或[applicationName,duration]汇总

映射:

<android.support.design.widget.CoordinatorLayout
android:id="@+id/main_content"
xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:fitsSystemWindows="true">

<android.support.design.widget.AppBarLayout
    android:id="@+id/appbar"
    android:layout_width="match_parent"
    android:layout_height="300px"
    android:fitsSystemWindows="true"
    android:theme="@style/ThemeOverlay.AppCompat.Dark.ActionBar">

    <android.support.design.widget.CollapsingToolbarLayout
        android:id="@+id/collapsing_toolbar"
        android:layout_width="match_parent"
        android:layout_height="match_parent"
        android:fitsSystemWindows="true"
        app:layout_scrollFlags="scroll|exitUntilCollapsed">

        <ImageView
            android:id="@+id/backdrop"
            android:layout_width="match_parent"
            android:layout_height="match_parent"
            android:paddingTop="?attr/actionBarSize"
            android:fitsSystemWindows="true"
            android:scaleType="centerCrop"
            app:layout_collapseMode="parallax"/>

        <android.support.v7.widget.Toolbar
            android:id="@+id/myToolbar"
            android:layout_width="match_parent"
            android:layout_height="?attr/actionBarSize"
            android:background="?attr/colorPrimary"
            app:layout_collapseMode="pin"
            app:popupTheme="@style/ThemeOverlay.AppCompat.Light">
            <TextView android:id="@+id/textViewTitle"  
                android:layout_width="match_parent"
                android:layout_height="wrap_content"/>
            </android.support.v7.widget.Toolbar>
    </android.support.design.widget.CollapsingToolbarLayout>
</android.support.design.widget.AppBarLayout>

<android.support.v4.widget.NestedScrollView
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    app:layout_behavior="@string/appbar_scrolling_view_behavior">

    <!-- my content that scrolls over the backdrop image -->

</android.support.v4.widget.NestedScrollView>

示例文档:

{
nested: {
    properties: {
        @timestamp: {
            type: "date",
            format: "dateOptionalTime"
        },
        activities: {
            type: "nested",
            include_in_parent: true,
            properties: {
                activity: {
                    type: "string",
                    index: "not_analyzed"
                },
                duration: {
                    type: "long"
                },
                entry: {
                    properties: {
                        blockName: {
                            type: "string",
                            index: "not_analyzed"
                        },
                        blockid: {
                            type: "string"
                        },
                        time: {
                            type: "date",
                            format: "dateOptionalTime"
                        }
                    }
                },
                exit: {
                    properties: {
                        blockName: {
                            type: "string",
                            index: "not_analyzed"
                        },
                        blockid: {
                            type: "string"
                        },
                        time: {
                            type: "date",
                            format: "dateOptionalTime"
                        }
                    }
                },
                seq: {
                    type: "integer"
                }
            }
        },
        applicationName: {
            type: "string",
            index: "not_analyzed"
        },
        duration: {
            type: "long"
        }
    }
}}

示例查询:

{
"@timestamp": "2015-09-15T17:35:24.020Z",
"duration": "37616",
"applicationName": "my application name",
"activities": [{
    "duration": "20362",
    "entry": {
        "blockid": "2",
        "time": "2015-09-15T17:35:24.493Z",
        "blockName": "My Self Service"
    },
    "exit": {
        "blockid": "2",
        "time": "2015-09-15T17:35:44.855Z",
        "blockName": "My Self Service"
    },
    "seq": 1,
    "activity": "Prompter v2.3"
}, {
    "duration": "96",
    "entry": {
        "blockid": "2",
        "time": "2015-09-15T17:35:45.268Z",
        "blockName": "My Self Service"
    },
    "exit": {
        "blockid": "2",
        "time": "2015-09-15T17:35:45.364Z",
        "blockName": "My Self Service"
    },
    "seq": 2,
    "activity": "Start v2.5"
}, {
    "duration": "15931",
    "entry": {
        "blockid": "2",
        "time": "2015-09-15T17:35:45.669Z",
        "blockName": "My Self Service"
    },
    "exit": {
        "blockid": "2",
        "time": "2015-09-15T17:36:01.600Z",
        "blockName": "My Self Service"
    },
    "seq": 3,
    "activity": "System v2.3"
}]}

问题下, 正如您在查询中看到的那样,在嵌套字段下的根级别字段上进行平均。必须包含reverse_nested,以便可以看到根级别字段“duration”。这意味着在生成查询时,我们需要检查字段组合,以查看父/子字段是否嵌套,嵌套在相同路径或根级别的情况,然后生成正确的查询。在更多字段上聚合时,这可能会更复杂,例如,通过[applicationName,activities.duration,duration,activities.activity]聚合。有谁知道更优雅的方式来做到这一点?如果我们可以指定绝对路径

,逻辑可能会更简单

1 个答案:

答案 0 :(得分:1)

不是我的问题的答案,而是添加更多示例,因为它可以帮助其他人更好地理解嵌套聚合。

      aggs field  average field 
case1 yes         yes
case2 yes         no
case3 no          yes
case4 no          no

是 - &gt;嵌套类型,no-&gt;非嵌套类型

具有相同路径的Case1

查询

{
"size": 0,
"aggs": {
    "dim0": {
        "nested" : {
            "path": "activities"
        },
        "aggs": {
            "dim1": {
                "terms": {
                    "field": "activities.activity"
                },
                "aggs":{
                    "avg_duration": {
                        "avg": {
                            "field": "activities.duration"
                        }
                    }
                }
            }
        }
    }
}}

结果:

{
"took": 1,
"timed_out": false,
"_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.0,
    "hits": []
},
"aggregations": {
    "dim0": {
        "doc_count": 3,
        "dim1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [{
                "key": "Prompter v2.3",
                "doc_count": 1,
                "avg_duration": {
                    "value": 20362.0
                }
            }, {
                "key": "Start v2.5",
                "doc_count": 1,
                "avg_duration": {
                    "value": 96.0
                }
            }, {
                "key": "System v2.3",
                "doc_count": 1,
                "avg_duration": {
                    "value": 15931.0
                }
            }]
        }
    }
}}

case1,两个字段都是嵌套的,但reverse_nested在所有“activities.duration”上具有相同的平均值

查询

{
"size": 0,
"aggs": {
    "dim0": {
        "nested" : {
            "path": "activities"
        },
        "aggs": {
            "dim1": {
                "terms": {
                    "field": "activities.activity"
                },
                "aggs": {
                    "dim_reverse1":{
                        "reverse_nested":{
                        },
                        "aggs":{
                            "avg_duration": {
                                "avg": {
                                    "field": "activities.duration"
                                }
                            }
                        }
                    }
                }
            }                
        }
    }
}}

结果

{
"took": 2,
"timed_out": false,
"_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.0,
    "hits": []
},
"aggregations": {
    "dim0": {
        "doc_count": 3,
        "dim1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [{
                "key": "Prompter v2.3",
                "doc_count": 1,
                "dim_reverse1": {
                    "doc_count": 1,
                    "avg_duration": {
                        "value": 12129.666666666666
                    }
                }
            }, {
                "key": "Start v2.5",
                "doc_count": 1,
                "dim_reverse1": {
                    "doc_count": 1,
                    "avg_duration": {
                        "value": 12129.666666666666
                    }
                }
            }, {
                "key": "System v2.3",
                "doc_count": 1,
                "dim_reverse1": {
                    "doc_count": 1,
                    "avg_duration": {
                        "value": 12129.666666666666
                    }
                }
            }]
        }
    }
}}

<强>情形3

查询

{
"size": 0,
"aggs": {
    "dim1": {
        "terms": {
            "field": "applicationName"
        },
        "aggs":{
            "avg_duration": {
                "avg": {
                    "field": "activities.duration"
                }
            }
        }
    }
}}

结果

{
"took": 2,
"timed_out": false,
"_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.0,
    "hits": []
},
"aggregations": {
    "dim1": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [{
            "key": "my application name",
            "doc_count": 1,
            "avg_duration": {
                "value": 12129.666666666666
            }
        }]
    }
}}

Case2包含reserver_nested以返回根级别

查询

{
"size": 0,
"aggs": {
    "dim0": {
        "nested" : {
            "path": "activities"
        },
        "aggs": {
            "dim1": {
                "terms": {
                    "field": "activities.activity"
                },
                "aggs": {
                    "dim_reverse":{
                        "reverse_nested":{},
                        "aggs":{
                            "avg_duration": {
                                "avg": {
                                    "field": "duration"
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}}

结果:

{
"took": 2,
"timed_out": false,
"_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.0,
    "hits": []
},
"aggregations": {
    "dim0": {
        "doc_count": 3,
        "dim1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [{
                "key": "Prompter v2.3",
                "doc_count": 1,
                "dim_reverse": {
                    "doc_count": 1,
                    "avg_duration": {
                        "value": 37616.0
                    }
                }
            }, {
                "key": "Start v2.5",
                "doc_count": 1,
                "dim_reverse": {
                    "doc_count": 1,
                    "avg_duration": {
                        "value": 37616.0
                    }
                }
            }, {
                "key": "System v2.3",
                "doc_count": 1,
                "dim_reverse": {
                    "doc_count": 1,
                    "avg_duration": {
                        "value": 37616.0
                    }
                }
            }]
        }
    }
}}

Case2,未指定嵌套路径

查询

{
"size": 0,
"aggs": {
    "dim1": {
        "terms": {
            "field": "activities.activity"
        },
        "aggs":{
            "avg_duration": {
                "avg": {
                    "field": "duration"
                }
            }
        }
    }
}}

结果结果与前一个结果相同

{
"took": 2,
"timed_out": false,
"_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.0,
    "hits": []
},
"aggregations": {
    "dim1": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [{
            "key": "Prompter v2.3",
            "doc_count": 1,
            "avg_duration": {
                "value": 37616.0
            }
        }, {
            "key": "Start v2.5",
            "doc_count": 1,
            "avg_duration": {
                "value": 37616.0
            }
        }, {
            "key": "System v2.3",
            "doc_count": 1,
            "avg_duration": {
                "value": 37616.0
            }
        }]
    }
}

}

Case2,未指定reserver_nested,未见根级别的“持续时间”

查询

{
"size": 0,
"aggs": {
    "dim0": {
        "nested" : {
            "path": "activities"
        },
        "aggs": {
            "dim1": {
                "terms": {
                    "field": "activities.activity"
                },
                "aggs":{
                    "avg_duration": {
                        "avg": {
                            "field": "duration"
                        }
                    }
                }
            }
        }
    }
}}

结果

{
"took": 2,
"timed_out": false,
"_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.0,
    "hits": []
},
"aggregations": {
    "dim0": {
        "doc_count": 3,
        "dim1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [{
                "key": "Prompter v2.3",
                "doc_count": 1,
                "avg_duration": {
                    "value": null
                }
            }, {
                "key": "Start v2.5",
                "doc_count": 1,
                "avg_duration": {
                    "value": null
                }
            }, {
                "key": "System v2.3",
                "doc_count": 1,
                "avg_duration": {
                    "value": null
                }
            }]
        }
    }
}}