我为其中一个领域创建了一个自定义模式分析器。大多数情况下,它会创建2个令牌。但是,当我尝试使用带有 AND 运算符或minimum_should_match作为 100%的匹配查询时,即使只有1个令牌被匹配,它也会返回记录。
映射索引:
{
"settings": {
"analysis": {
"analyzer": {
"test_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"test_pattern",
"unique"
]
}
},
"filter": {
"test_pattern": {
"type": "pattern_capture",
"preserve_original": 0,
"patterns": [
".*###(\\d*)###(.*###.*###.*)",
".*###(.*###.*###.*)"
]
}
}
}
},
"mappings": {
"doc_type": {
"properties": {
"test_value": {
"type": "text",
"analyzer": "test_analyzer"
}
}
}
}
}
测试文档:
{
"test_value": "abc###def###12345###jkl###mno###pqr"
}
{
"test_value": "abc###def###12367###jkl###mno###pqr"
}
查询:
{
"query": {
"match": {
"test_value": {
"query": "abc###def###12345###jkl###mno###pqr",
"operator": "AND"
}
}
}
}
以下查询返回两条记录。
我也试图理解结果的解释。我不知道为什么解释中会有同义词。你能帮我哪里错了吗?
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.46029136,
"hits": [
{
"_shard": "[test_stack][1]",
"_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
"_index": "test_stack",
"_type": "doc_type",
"_id": "AWkPiO2DN2C8SdyE0d6K",
"_score": 0.46029136,
"_source": {
"test_value": "abc###def###12345###jkl###mno###pqr"
},
"_explanation": {
"value": 0.46029136,
"description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.46029136,
"description": "score(doc=0,freq=2.0 = termFreq=2.0 ), product of:",
"details": [
{
"value": 0.2876821,
"description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
"details": [
{
"value": 1,
"description": "docFreq",
"details": []
},
{
"value": 1,
"description": "docCount",
"details": []
}
]
},
{
"value": 1.6,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
"value": 2,
"description": "termFreq=2.0",
"details": []
},
{
"value": 1.2,
"description": "parameter k1",
"details": []
},
{
"value": 0.75,
"description": "parameter b",
"details": []
},
{
"value": 2,
"description": "avgFieldLength",
"details": []
},
{
"value": 1,
"description": "fieldLength",
"details": []
}
]
}
]
}
]
}
},
{
"_shard": "[test_stack][4]",
"_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
"_index": "test_stack",
"_type": "doc_type",
"_id": "AWkPiQfJN2C8SdyE0d6L",
"_score": 0.36165747,
"_source": {
"test_value": "abc###def###12378###jkl###mno###pqr"
},
"_explanation": {
"value": 0.3616575,
"description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.3616575,
"description": "score(doc=0,freq=1.0 = termFreq=1.0 ), product of:",
"details": [
{
"value": 0.2876821,
"description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
"details": [
{
"value": 1,
"description": "docFreq",
"details": []
},
{
"value": 1,
"description": "docCount",
"details": []
}
]
},
{
"value": 1.2571429,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
"value": 1,
"description": "termFreq=1.0",
"details": []
},
{
"value": 1.2,
"description": "parameter k1",
"details": []
},
{
"value": 0.75,
"description": "parameter b",
"details": []
},
{
"value": 2,
"description": "avgFieldLength",
"details": []
},
{
"value": 1,
"description": "fieldLength",
"details": []
}
]
}
]
}
]
}
}
]
}
}