使用Elastic Highlighting API突出显示“最佳匹配”

时间:2019-02-15 05:11:19

标签: elasticsearch highlight fuzzy-search

是否可以使用Elastic Highlighting API突出显示“最佳匹配”? “最佳匹配”是指单词中的完全匹配,即使整个单词都被查询匹配。例如:

  • 文档内容为Dubai
  • 搜索查询为duba
  • 期望的结果是<b>Duba</b>i

但是问题出在我的查询中。该查询具有一些“模糊性”查询。

这是示例索引配置:

PUT /highlight_best_match
{
  "settings": {
    "number_of_shards": "1",
    "number_of_replicas": "1",
    "analysis": {
      "filter": {
        "language_stemmer": {
          "name": "german2",
          "type": "stemmer"
        },
        "language_stopwords": {
          "type": "stop",
          "stopwords": "_german_"
        }
      },
      "char_filter": {
        "ampersand_to_and": {
          "type": "mapping",
          "mappings": [
            "&=> and "
          ]
        }
      },
      "analyzer": {
        "prefix_analyzer": {
          "type": "custom",
          "tokenizer": "edge_ngram_tokenizer",
          "filter": [
            "german_normalization",
            "lowercase"
          ]
        },
        "match_analyzer": {
          "char_filter": [
            "html_strip",
            "ampersand_to_and"
          ],
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "asciifolding",
            "language_stopwords",
            "language_stemmer"
          ]
        },
        "search_analyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "german_normalization",
            "lowercase"
          ]
        }
      },
      "tokenizer": {
        "edge_ngram_tokenizer": {
          "type": "edge_ngram",
          "min_gram": "2",
          "max_gram": "20",
          "token_chars": [
            "letter",
            "digit"
          ]
        }
      }
    }
  },
  "mappings": {
    "default": {
      "dynamic": "false",
      "properties": {
        "id": {
          "type": "integer"
        },
        "title": {
          "type": "keyword",
          "fields": {
            "match": {
              "type": "text",
              "term_vector": "with_positions_offsets",
              "index_options": "offsets",
              "analyzer": "match_analyzer"
            },
            "prefix": {
              "type": "text",
              "term_vector": "with_positions_offsets",
              "index_options": "offsets",
              "analyzer": "prefix_analyzer",
              "search_analyzer": "search_analyzer"
            }
          }
        }
      }
    }
  }
}

和一些说明该示例的数据:

POST /_bulk
{"create": {"_id": "1", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dubai"}
{"create": {"_id": "2", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dumai"}
{"create": {"_id": "3", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Cuba"}
{"create": {"_id": "4", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Kuba Südküste"}
{"create": {"_id": "5", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dubai Kreuzfahrt"}

查询是

GET /highlight_best_match/_search
{
  "query": {
    "bool": {
      "must": {
        "bool": {
          "should": [
            {
              "match": {
                "title.prefix": {
                  "query": "duba",
                  "fuzziness": 1,
                  "boost": 1
                }
              }
            },
            {
              "match": {
                "title.match": {
                  "query": "duba",
                  "fuzziness": 1,
                  "boost": 1
                }
              }
            }
          ]
        }
      },
      "should": [
        {
          "match_phrase_prefix": {
            "title.match": {
              "query": "duba",
              "boost": 5
            }
          }
        },
        {
          "match": {
            "title.prefix": {
              "query": "duba",
              "fuzziness": 0,
              "boost": 3
            }
          }
        },
        {
          "match": {
            "title.match": {
              "query": "duba",
              "fuzziness": 0,
              "boost": 10
            }
          }
        }
      ]
    }
  },
  "highlight": {
    "encoder": "plain",
    "order": "score",
    "pre_tags": [
      "<b>"
    ],
    "post_tags": [
      "</b>"
    ],
    "fields": {
      "title.prefix": {
        "type": "fvh",
        "matched_fields": [
          "title.match",
          "title.prefix"
        ]
      }
    }
  }
}

结果是

{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 5,
    "max_score": 11.402948,
    "hits": [
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "1",
        "_score": 11.402948,
        "_source": {
          "title": "Dubai"
        },
        "highlight": {
          "title.prefix": [
            "<b>Dubai</b>"
          ]
        }
      },
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "5",
        "_score": 6.812179,
        "_source": {
          "title": "Dubai Kreuzfahrt"
        },
        "highlight": {
          "title.prefix": [
            "<b>Dubai</b> Kreuzfahrt"
          ]
        }
      },
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "3",
        "_score": 1.5331156,
        "_source": {
          "title": "Cuba"
        },
        "highlight": {
          "title.prefix": [
            "<b>Cuba</b>"
          ]
        }
      },
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "4",
        "_score": 1.0343978,
        "_source": {
          "title": "Kuba Südküste"
        },
        "highlight": {
          "title.prefix": [
            "<b>Kuba</b> Südküste"
          ]
        }
      },
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "2",
        "_score": 0.7896109,
        "_source": {
          "title": "Dumai"
        },
        "highlight": {
          "title.prefix": [
            "<b>Duma</b>i"
          ]
        }
      }
    ]
  }
}

请查看ID = 1和ID = 5的结果。是否可以像duba那样在ID = 2的结果中仅高亮显示<b>Duma</b>i

我知道我只能为确切的查询设置highlight_query。 但无论如何,我都希望突出显示结果,但如果是完全匹配,则首选。

提前谢谢!

0 个答案:

没有答案