Question

我为其中一个领域创建了一个自定义模式分析器。大多数情况下，它会创建2个令牌。但是，当我尝试使用带有 AND 运算符或minimum_should_match作为 100％的匹配查询时，即使只有1个令牌被匹配，它也会返回记录。

映射索引：

{
  "settings": {
    "analysis": {
      "analyzer": {
        "test_analyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "test_pattern",
            "unique"
          ]
        }
      },
      "filter": {
        "test_pattern": {
          "type": "pattern_capture",
          "preserve_original": 0,
          "patterns": [
            ".*###(\\d*)###(.*###.*###.*)",
            ".*###(.*###.*###.*)"
          ]
        }
      }
    }
  },
  "mappings": {
    "doc_type": {
      "properties": {
        "test_value": {
          "type": "text",
          "analyzer": "test_analyzer"
        }
      }
    }
  }
}

测试文档：

{
  "test_value": "abc###def###12345###jkl###mno###pqr"
}

{
  "test_value": "abc###def###12367###jkl###mno###pqr"
}

查询：

{
  "query": {
    "match": {
      "test_value": {
        "query": "abc###def###12345###jkl###mno###pqr",
        "operator": "AND"
      }
    }
  }
}

以下查询返回两条记录。

我也试图理解结果的解释。我不知道为什么解释中会有同义词。你能帮我哪里错了吗？

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.46029136,
    "hits": [
      {
        "_shard": "[test_stack][1]",
        "_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
        "_index": "test_stack",
        "_type": "doc_type",
        "_id": "AWkPiO2DN2C8SdyE0d6K",
        "_score": 0.46029136,
        "_source": {
          "test_value": "abc###def###12345###jkl###mno###pqr"
        },
        "_explanation": {
          "value": 0.46029136,
          "description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.46029136,
              "description": "score(doc=0,freq=2.0 = termFreq=2.0 ), product of:",
              "details": [
                {
                  "value": 0.2876821,
                  "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "docFreq",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "docCount",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 1.6,
                  "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                  "details": [
                    {
                      "value": 2,
                      "description": "termFreq=2.0",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "parameter k1",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "parameter b",
                      "details": []
                    },
                    {
                      "value": 2,
                      "description": "avgFieldLength",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "fieldLength",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      },
      {
        "_shard": "[test_stack][4]",
        "_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
        "_index": "test_stack",
        "_type": "doc_type",
        "_id": "AWkPiQfJN2C8SdyE0d6L",
        "_score": 0.36165747,
        "_source": {
          "test_value": "abc###def###12378###jkl###mno###pqr"
        },
        "_explanation": {
          "value": 0.3616575,
          "description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.3616575,
              "description": "score(doc=0,freq=1.0 = termFreq=1.0 ), product of:",
              "details": [
                {
                  "value": 0.2876821,
                  "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "docFreq",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "docCount",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 1.2571429,
                  "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "termFreq=1.0",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "parameter k1",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "parameter b",
                      "details": []
                    },
                    {
                      "value": 2,
                      "description": "avgFieldLength",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "fieldLength",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      }
    ]
  }
}

使用自定义分析器对字段进行匹配查询无法与运算符或minimum_should_match

0 个答案: