如何在Elasticsearch中获得嵌套字段的不同值?

时间:2019-09-05 09:45:54

标签: elasticsearch nested kibana elasticsearch-dsl

我在Elasticsearch中具有以下文档结构:

{
  "bureau_data" : {
    "mappings" : {
      "dynamic_date_formats" : [
        "yyyy-MM-dd"
      ],
      "dynamic_templates" : [
        {
          "objects" : {
            "match_mapping_type" : "object",
            "mapping" : {
              "type" : "nested"
            }
          }
        }
      ],
      "properties" : {
        "raw_derived" : {
          "type" : "nested",
          "properties" : {
            "applications" : {
              "type" : "nested",
              "properties" : {
                "accounts" : {
                  "type" : "nested",
                  "properties" : {
                    "account_type_name" : {
                      "type" : "text",
                      "fields" : {
                        "keyword" : {
                          "type" : "keyword",
                          "ignore_above" : 256
                        }
                      }
                    },
                    "accounttypeid" : {
                      "type" : "text",
                      "fields" : {
                        "keyword" : {
                          "type" : "keyword",
                          "ignore_above" : 256
                        }
                      }
                    },
                    "applicationcreditreportaccountid" : {
                      "type" : "text",
                      "fields" : {
                        "keyword" : {
                          "type" : "keyword",
                          "ignore_above" : 256
                        }
                      }
                    },
                    "currentbalance" : {
                      "type" : "long"
                    },
                    "dayspastdue" : {
                      "type" : "long"
                    },
                    "institution_name" : {
                      "type" : "text",
                      "fields" : {
                        "keyword" : {
                          "type" : "keyword",
                          "ignore_above" : 256
                        }
                      }
                    },
                    "institutionid" : {
                      "type" : "text",
                      "fields" : {
                        "keyword" : {
                          "type" : "keyword",
                          "ignore_above" : 256
                        }
                      }
                    }
                  }
                },
                "applicationcreditreportid" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "applicationid" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "createdat" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "creditbureautypeid" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "dateofbirth" : {
                  "type" : "date",
                  "format" : "yyyy-MM-dd"
                },
                "firstname" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "lastname" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "score" : {
                  "type" : "long"
                },
                "source_name" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "status" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "updatedat" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                }
              }
            },
            "dob" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "firstname" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "lastname" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "middlename" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "mobilephone" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "source" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            }
          }
        },
        "userid" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          },
          "fielddata" : true
        }
      }
    }
  }
}

下面是我的索引的映射:

GET /my_index/_search?size=0
{
  "aggs": {
    "nested_path": {
      "nested": {
        "path": "raw_derived.applications.accounts"
      },
      "aggs": {
        "distinct_values": {
          "cardinality": {
            "field": "raw_derived.applications.accounts.account_type.keyword"
          }
        }
      }
    }
  }
}

我想要account_type字段的不同值,这是一个嵌套字段。我尝试过查询,但只给我不同的计数。

  "hits" : {
    "total" : {
      "value" : 50,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "nested_path" : {
      "doc_count" : 828,
      "distinct_values" : {
        "value" : 70
      }
    }
  }
}

我希望输出具有不同的account_type值,但输出仅是计数。以下是我的输出代码段:

GET /bureau_data/_search?size=0
{
 "_source": "{aggregations}", 
 "aggs": {
   "unique": {
     "nested": {
       "path": "raw_derived.applications"
     },
         "aggs": {
           "score_unq": {
             "terms": {
               "field": "raw_derived.applications.source_name.keyword"
             }
           }
         }
       }
     }
   }

下面是我尝试过的查询及其工作方式:

library(ggplot2)





if (!requireNamespace("BiocManager", quietly = TRUE))
   install.packages("BiocManager")

   BiocManager::install("EBImage")
   library("EBImage")

   x <- readImage("U:/Sample/floor plan sample.png")

   # Scale to a specific width and height
   LayoutJPG <- resize(x, w = 500, h = 500)
   display(LayoutJPG)


   ui <- fluidPage(
     fluidRow(
       column(width = 12, 
              plotOutput("plot1", height = 300,
                  dblclick = "plot1_dblclick",
                  brush = brushOpts(
                    id = "plot1_brush",
                    resetOnNew = TRUE
                  )
              )
       )

     )
   )

   server <- function(input, output) {

     ranges <- reactiveValues(x = NULL, y = NULL)

     output$plot1 <- renderPlot({
       ggplot(mtcars, aes(wt, mpg)) +
         annotation_custom(grid::rasterGrob( LayoutJPG ,
                                      width = unit(1,"npc"),
                                      height = unit(1,"npc")),
                    -Inf, Inf, -Inf, Inf) +

  geom_point() +
  coord_cartesian(xlim = ranges$x, ylim = ranges$y, expand = FALSE)
     })

     # When a double-click happens, check if there's a brush on the plot.
     # If so, zoom to the brush bounds; if not, reset the zoom.
     observeEvent(input$plot1_dblclick, {
       brush <- input$plot1_brush
       if (!is.null(brush)) {
         ranges$x <- c(brush$xmin, brush$xmax)
         ranges$y <- c(brush$ymin, brush$ymax)

       } else {
         ranges$x <- NULL
         ranges$y <- NULL
       }
     })



   }

          shinyApp(ui, server)

任何建议都会有所帮助

1 个答案:

答案 0 :(得分:1)

从官方文档中- 基数汇总:- 单值指标聚合,用于计算不同值的近似计数。可以从文档中的特定字段中提取值,也可以由脚本生成值。

代替按“基数”进行汇总,请尝试如下进行术语汇总:

{
"size":0,
"aggregations": {
    "distinct_values": {
      "terms": {
        "field": "raw_derived.applications.accounts.account_type.keyword",
        "size": 1000,
        "min_doc_count": 1,
        "order": [
          {
            "_count": "desc"
          },
          {
            "_key": "asc"
          }
        ]
      }
    }
}