我创建了一个具有父子join
数据类型的映射。
我想在单个查询中使每个父级具有最大值的孩子。
有可能吗?我尝试了一些事情,例如inner_hits
的定义以及top_hits
,children
,has_parent
和has_child
之类的聚合。
我的映射基于this elasticsearch_dsl example中的类Post
,Question
和Answer
。
使用elasticsearch_dsl代码的解决方案会很好,但是简单的elasticsearch查询也会有所帮助。
谢谢:)
编辑:我正在附加我的代码,希望对您有所帮助。
LoggerLogBase
类(基于Post
类):
class LoggerLogBase(Document):
"""
A base class for :class:`~data_classes.Log` and :class:`~data_classes.Logger` data classes.
"""
logger_log = Join(relations={'logger': 'log'})
@classmethod
def _matches(cls, hit):
"""
Returns whether a hit matches this class or not.
"""
return False
class Index:
"""
Meta-class for defining the index name.
"""
name = 'logger-log'
Logger
类(基于Question
类):
class Logger(LoggerLogBase):
"""
A class to represent a temperature logger.
"""
name = Keyword()
display_name = Keyword()
is_displayed = Boolean()
@classmethod
def _matches(cls, hit):
"""
Returns whether a hit matches this class or not.
"""
return hit['_source']['logger_log'] == 'logger'
@classmethod
def search(cls, **kwargs):
"""
Creates an :class:`~elasticsearch_dsl.Search` instance that will search
over this index.
"""
return cls._index.search(**kwargs).filter('term', logger_log='logger')
def add_log(self, timestamp, heat_index_celsius, humidity, temperature_celsius):
"""
Save a new log which was logged by this logger.
"""
log = Log(
_routing=self.meta.id,
logger_log={'name': 'log', 'parent': self.meta.id},
timestamp=timestamp,
heat_index_celsius=heat_index_celsius,
humidity=humidity,
temperature_celsius=temperature_celsius
)
log.save()
return log
def search_logs(self):
"""
Returns the search for this logger's logs.
"""
search = Log.search()
search = search.filter('parent_id', type='log', id=self.meta.id)
search = search.params(routing=self.meta.id)
return search
def search_latest_log(self):
"""
Returns the search for this logger's latest log.
"""
search = self.search_logs()\
.params(size=0)
search.aggs.metric('latest_log',
'top_hits',
sort=[{'timestamp': {'order': 'desc'}}],
size=1)
return search
def save(self, using=None, index=None, validate=True, **kwargs):
"""
Saves the document into elasticsearch.
See documentation for elasticsearch_dsl.Document.save for more information.
"""
self.logger_log = {'name': 'logger'}
return super().save(using, index, validate, **kwargs)
Log
类(基于Answer
类):
class Log(LoggerLogBase):
"""
A class to represent a single temperature measurement log.
"""
timestamp = Date()
heat_index_celsius = Float()
humidity = Float()
temperature_celsius = Float()
@classmethod
def _matches(cls, hit):
"""
Returns whether a hit matches this class or not.
"""
return isinstance(hit['_source']['logger_log'], dict) \
and hit['_source']['logger_log'].get('name') == 'log'
@classmethod
def search(cls, using=None, **kwargs):
"""
Creates an :class:`~elasticsearch_dsl.Search` instance that will search
over this index.
"""
return cls._index.search(using=using, **kwargs).exclude('term', logger_log='logger')
@property
def logger(self):
"""
Returns the logger that logged this log.
"""
if 'logger' not in self.meta:
self.meta.logger = Logger.get(id=self.logger_log.parent, index=self.meta.index)
return self.meta.logger
def save(self, using=None, index=None, validate=True, **kwargs):
"""
Saves the document into elasticsearch.
See documentation for elasticsearch_dsl.Document.save for more information.
"""
self.meta.routing = self.logger_log.parent
return super().save(using, index, validate, **kwargs)
我当前的解决方案是为每个记录器调用logger.search_latest_log()
,但是它需要N个查询。我希望能够在单个查询中完成操作,以提高此操作的性能。
答案 0 :(得分:1)
我认为您的解决方案是Child Aggregation和top_hits的混合:
POST logger-log/_search?size=0
{
"aggs": {
"top-loggers": {
"terms": {
"field": "name"
},
"aggs": {
"to-logs": {
"children": {
"type" : "log"
},
"aggs": {
"top-logs": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}
}