我在Elasticsearch中具有以下文档结构:
{
"bureau_data" : {
"mappings" : {
"dynamic_date_formats" : [
"yyyy-MM-dd"
],
"dynamic_templates" : [
{
"objects" : {
"match_mapping_type" : "object",
"mapping" : {
"type" : "nested"
}
}
}
],
"properties" : {
"raw_derived" : {
"type" : "nested",
"properties" : {
"applications" : {
"type" : "nested",
"properties" : {
"accounts" : {
"type" : "nested",
"properties" : {
"account_type_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"accounttypeid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"applicationcreditreportaccountid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"currentbalance" : {
"type" : "long"
},
"dayspastdue" : {
"type" : "long"
},
"institution_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"institutionid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"applicationcreditreportid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"applicationid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"createdat" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"creditbureautypeid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"dateofbirth" : {
"type" : "date",
"format" : "yyyy-MM-dd"
},
"firstname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lastname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"score" : {
"type" : "long"
},
"source_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"status" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"updatedat" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"dob" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"firstname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lastname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"middlename" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"mobilephone" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"source" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"userid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"fielddata" : true
}
}
}
}
}
下面是我的索引的映射:
GET /my_index/_search?size=0
{
"aggs": {
"nested_path": {
"nested": {
"path": "raw_derived.applications.accounts"
},
"aggs": {
"distinct_values": {
"cardinality": {
"field": "raw_derived.applications.accounts.account_type.keyword"
}
}
}
}
}
}
我想要account_type字段的不同值,这是一个嵌套字段。我尝试过查询,但只给我不同的计数。
"hits" : {
"total" : {
"value" : 50,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"nested_path" : {
"doc_count" : 828,
"distinct_values" : {
"value" : 70
}
}
}
}
我希望输出具有不同的account_type值,但输出仅是计数。以下是我的输出代码段:
GET /bureau_data/_search?size=0
{
"_source": "{aggregations}",
"aggs": {
"unique": {
"nested": {
"path": "raw_derived.applications"
},
"aggs": {
"score_unq": {
"terms": {
"field": "raw_derived.applications.source_name.keyword"
}
}
}
}
}
}
下面是我尝试过的查询及其工作方式:
library(ggplot2)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("EBImage")
library("EBImage")
x <- readImage("U:/Sample/floor plan sample.png")
# Scale to a specific width and height
LayoutJPG <- resize(x, w = 500, h = 500)
display(LayoutJPG)
ui <- fluidPage(
fluidRow(
column(width = 12,
plotOutput("plot1", height = 300,
dblclick = "plot1_dblclick",
brush = brushOpts(
id = "plot1_brush",
resetOnNew = TRUE
)
)
)
)
)
server <- function(input, output) {
ranges <- reactiveValues(x = NULL, y = NULL)
output$plot1 <- renderPlot({
ggplot(mtcars, aes(wt, mpg)) +
annotation_custom(grid::rasterGrob( LayoutJPG ,
width = unit(1,"npc"),
height = unit(1,"npc")),
-Inf, Inf, -Inf, Inf) +
geom_point() +
coord_cartesian(xlim = ranges$x, ylim = ranges$y, expand = FALSE)
})
# When a double-click happens, check if there's a brush on the plot.
# If so, zoom to the brush bounds; if not, reset the zoom.
observeEvent(input$plot1_dblclick, {
brush <- input$plot1_brush
if (!is.null(brush)) {
ranges$x <- c(brush$xmin, brush$xmax)
ranges$y <- c(brush$ymin, brush$ymax)
} else {
ranges$x <- NULL
ranges$y <- NULL
}
})
}
shinyApp(ui, server)
任何建议都会有所帮助
答案 0 :(得分:1)
从官方文档中- 基数汇总:- 单值指标聚合,用于计算不同值的近似计数。可以从文档中的特定字段中提取值,也可以由脚本生成值。
代替按“基数”进行汇总,请尝试如下进行术语汇总:
{
"size":0,
"aggregations": {
"distinct_values": {
"terms": {
"field": "raw_derived.applications.accounts.account_type.keyword",
"size": 1000,
"min_doc_count": 1,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
}
}