我有一个双节点弹性搜索设置,其中一个节点上的相同搜索查询导致不同的结果,我想找出原因。详细说明:
_cluster /状态:
{
"cluster_name": "elasticsearch.abc",
"version": 330,
"master_node": "HexGKOoHSxqRaMmwduCVIA",
"blocks": {},
"nodes": {
"rUZDrUfMR1-RWcy4t0YQNw": {
"name": "Owl",
"transport_address": "inet[/10.123.123.123:9303]",
"attributes": {}
},
"HexGKOoHSxqRaMmwduCVIA": {
"name": "Bloodlust II",
"transport_address": "inet[/10.123.123.124:9303]",
"attributes": {}
}
},
"metadata": {
"templates": {},
"indices": {
"abc": {
"state": "open",
"settings": {
"index": {
"creation_date": "1432297566361",
"uuid": "LKx6Ro9CRXq6JZ9a29jWeA",
"analysis": {
"filter": {
"substring": {
"type": "nGram",
"min_gram": "1",
"max_gram": "50"
}
},
"analyzer": {
"str_index_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "keyword"
},
"str_search_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "keyword"
}
}
},
"number_of_replicas": "1",
"number_of_shards": "5",
"version": {
"created": "1050099"
}
}
},
"mappings": {
"some_mapping": {
...
}
...
},
"aliases": []
}
}
},
"routing_table": {
"indices": {
"abc": {
"shards": {
"0": [
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 0,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 0,
"index": "abc"
}
],
"1": [
{
"state": "STARTED",
"primary": false,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 1,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 1,
"index": "abc"
}
],
"2": [
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 2,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 2,
"index": "abc"
}
],
"3": [
{
"state": "STARTED",
"primary": false,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 3,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 3,
"index": "abc"
}
],
"4": [
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 4,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 4,
"index": "abc"
}
]
}
}
}
},
"routing_nodes": {
"unassigned": [],
"nodes": {
"HexGKOoHSxqRaMmwduCVIA": [
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 4,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 0,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 3,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 1,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 2,
"index": "abc"
}
],
"rUZDrUfMR1-RWcy4t0YQNw": [
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 4,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 0,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 3,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 1,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 2,
"index": "abc"
}
]
}
},
"allocations": []
}
_cluster /健康
{
"cluster_name": "elasticsearch.abc",
"status": "green",
"timed_out": false,
"number_of_nodes": 2,
"number_of_data_nodes": 2,
"active_primary_shards": 5,
"active_shards": 10,
"relocating_shards": 0,
"initializing_shards": 0,
"unassigned_shards": 0,
"number_of_pending_tasks": 0
}
_cluster /统计
{
"timestamp": 1432312770877,
"cluster_name": "elasticsearch.abc",
"status": "green",
"indices": {
"count": 1,
"shards": {
"total": 10,
"primaries": 5,
"replication": 1,
"index": {
"shards": {
"min": 10,
"max": 10,
"avg": 10
},
"primaries": {
"min": 5,
"max": 5,
"avg": 5
},
"replication": {
"min": 1,
"max": 1,
"avg": 1
}
}
},
"docs": {
"count": 19965,
"deleted": 4
},
"store": {
"size_in_bytes": 399318082,
"throttle_time_in_millis": 0
},
"fielddata": {
"memory_size_in_bytes": 60772,
"evictions": 0
},
"filter_cache": {
"memory_size_in_bytes": 15284,
"evictions": 0
},
"id_cache": {
"memory_size_in_bytes": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 68,
"memory_in_bytes": 10079288,
"index_writer_memory_in_bytes": 0,
"index_writer_max_memory_in_bytes": 5120000,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0
},
"percolate": {
"total": 0,
"time_in_millis": 0,
"current": 0,
"memory_size_in_bytes": -1,
"memory_size": "-1b",
"queries": 0
}
},
"nodes": {
"count": {
"total": 2,
"master_only": 0,
"data_only": 0,
"master_data": 2,
"client": 0
},
"versions": [
"1.5.0"
],
"os": {
"available_processors": 8,
"mem": {
"total_in_bytes": 0
},
"cpu": []
},
"process": {
"cpu": {
"percent": 0
},
"open_file_descriptors": {
"min": 649,
"max": 654,
"avg": 651
}
},
"jvm": {
"max_uptime_in_millis": 2718272183,
"versions": [
{
"version": "1.7.0_40",
"vm_name": "Java HotSpot(TM) 64-Bit Server VM",
"vm_version": "24.0-b56",
"vm_vendor": "Oracle Corporation",
"count": 2
}
],
"mem": {
"heap_used_in_bytes": 2665186528,
"heap_max_in_bytes": 4060086272
},
"threads": 670
},
"fs": {
"total_in_bytes": 631353901056,
"free_in_bytes": 209591468032,
"available_in_bytes": 209591468032
},
"plugins": []
}
}
示例查询:
/_search?from=22&size=1
{
"query": {
"bool": {
"should": [{
"match": {
"address.city": {
"query": "Bremen",
"boost": 2
}
}
}],
"must": [{
"match": {
"type": "L"
}
}]
}
}
}
对第一个请求的回复
{
"took": 30,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 19543,
"max_score": 6.407021,
"hits": [{
"_index": "abc",
"_type": "xyz",
"_id": "ABC123",
"_score": 5.8341036,
"_source": {
...
}
}]
}
}
第二次请求的回复
{
"took": 27,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 19543,
"max_score": 6.407021,
"hits": [
{
"_index": "abc",
"_type": "xyz",
"_id": "FGH12343",
"_score": 5.8341036,
"_source": {
...
}
}
]
}
}
原因可能是什么?如何确保不同节点的结果相同?
按要求解释查询:search / abc / mytype / _search?from = 0& size = 1& search_type = dfs_query_then_fetch& explain =
{
"query": {
"bool": {
"should": [{
"match": {
"address.city": {
"query": "Karlsruhe",
"boost": 2
}
}
}]
}
}
}
对第一个请求的回复
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 41,
"max_score": 7.211497,
"hits": [
{
"_shard": 0,
"_node": "rUZDrUfMR1-RWcy4t0YQNw",
"_index": "abc",
"_type": "mytype",
"_id": "abc123",
"_score": 7.211497,
"_source": {...
},
"_explanation": {
"value": 7.211497,
"description": "weight(address.city:karlsruhe^2.0 in 1598) [PerFieldSimilarity], result of:",
"details": [
{
"value": 7.211497,
"description": "fieldWeight in 1598, product of:",
"details": [
{
"value": 1,
"description": "tf(freq=1.0), with freq of:",
"details": [
{
"value": 1,
"description": "termFreq=1.0"
}
]
},
{
"value": 7.211497,
"description": "idf(docFreq=46, maxDocs=23427)"
},
{
"value": 1,
"description": "fieldNorm(doc=1598)"
}
]
}
]
}
}
]
}
}
第二次请求的回复
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 41,
"max_score": 7.194322,
"hits": [
{
"_shard": 0,
"_node": "rUZDrUfMR1-RWcy4t0YQNw",
"_index": "abc",
"_type": "mytype",
"_id": "abc123",
"_score": 7.194322,
"_source": {...
},
"_explanation": {
"value": 7.194322,
"description": "weight(address.city:karlsruhe^2.0 in 1598) [PerFieldSimilarity], result of:",
"details": [
{
"value": 7.194322,
"description": "fieldWeight in 1598, product of:",
"details": [
{
"value": 1,
"description": "tf(freq=1.0), with freq of:",
"details": [
{
"value": 1,
"description": "termFreq=1.0"
}
]
},
{
"value": 7.194322,
"description": "idf(docFreq=48, maxDocs=24008)"
},
{
"value": 1,
"description": "fieldNorm(doc=1598)"
}
]
}
]
}
}
]
}
}
答案 0 :(得分:7)
命中不匹配很可能是因为主分片和副本之间不同步。如果您有一个节点离开集群(无论出于何种原因)但仍继续对文档进行更改(索引,删除,更新),则会发生这种情况。
得分部分是一个不同的故事,可以通过"相关性评分"来解释。来自this blog post的部分:
当您执行搜索时,Elasticsearch面临一个有趣的困境。您的查询需要查找所有相关文档...但这些文档分散在群集中的任意数量的分片中。 每个分片基本上都是Lucene索引,它维护自己的TF和DF统计信息。碎片只能知道多少次"菠萝"出现在分片中,而不是整个群集。
在搜索时,我会尝试一下,然后再搜索" DFS查询然后获取",这意味着_search?search_type=dfs_query_then_fetch ....
应该有助于评分的准确性。
即使在删除和重建索引之后,节点断开期间由文档更改导致的不同文档计数也会影响分数计算。这可能是因为对副本和主分片上的文档更改发生了不同,更具体地说,文档已被删除。删除的文档将在合并时间的段中从索引中永久删除。除非在底层Lucene实例中满足某些条件,否则段合并不会发生。
强制合并可以通过POST /_optimize?max_num_segments=1
启动。警告:这需要很长时间(取决于索引的大小)并且将需要大量IO资源和CPU,并且不应在正在进行更改的索引上运行。文档:Optimize,Segments Merging