在Elasticsearch 6.5上进行语音和模糊搜索

时间:2018-12-26 16:17:48

标签: python-3.x elasticsearch fuzzy-search elasticsearch-phonetic

我已经将一些csv文件索引到Elasticsearch中,现在我正在尝试对某些列(基本上是两列)进行搜索。我想做: -精确搜索 -语音搜索 -模糊搜索 -同时进行语音和模糊搜索 -如果可能的话,一次将所有这些

请在下面找到我的ElasticSearch 6.5.1和Python 3.7的代码

import time
from elasticsearch import Elasticsearch
from elasticsearch import helpers

ELASTICSEARCH_HOST = "localhost"
ELASTICSEARCH_PORT = 9200
ELASTICSEARCH_INDEX_NAME = "phonetic_index"

es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': 
ELASTICSEARCH_PORT}])


def printSearchResult(caption, res):
    print(caption)
    hits = res['hits']['hits']
    for hit in hits:
        print("%0.2f %s" % (hit['_score'], hit['_source'] 
 ['text']))
    print("\n")


request_body = {
  "query": {
    "bool": {
      "must": [
        {
          "query": {
            "match": {
               "text.phonetic": {
                "query": "Meier"
              }
            }
          }
        }],
     "should" : [
        {
          "fuzzy": {
            "text": {
              "value": "Meier",
              "fuzziness": 2
            }
          }
        }
      ]
    }
  },
  "size" : 15
}


result_phonetic_and_fuzzy2 = es.search(index = 
ELASTICSEARCH_INDEX_NAME, body = request_body2)

printSearchResult("Result of search using phonetic search combined 
with fuzzy search with fuzziness 2:", result_phonetic_and_fuzzy2)

尝试代码时,出现以下错误

-------------------------------------------------------------------- 
-------
RequestError                              Traceback (most recent 
call last)
<ipython-input-35-3790ca54c72b> in <module>()
    138 
    139 
--> 140 result_phonetic_and_fuzzy2 = es.search(index = 
 ELASTICSEARCH_INDEX_NAME, body = request_body2)
    141 
    142 printSearchResult("Result of search using phonetic search 
combined with fuzzy search with fuzziness 2:", 
result_phonetic_and_fuzzy2)

~/anaconda3/lib/python3.7/site- 
packages/elasticsearch/client/utils.py in _wrapped(*args, **kwargs)
     74                 if p in kwargs:
     75                     params[p] = kwargs.pop(p)
---> 76             return func(*args, params=params, **kwargs)
     77         return _wrapped
     78     return _wrapper

~/anaconda3/lib/python3.7/site- 
packages/elasticsearch/client/__init__.py in search(self, index, 
doc_type, body, params)
    658             index = '_all'
    659         return self.transport.perform_request('GET', 
_make_path(index,
--> 660             doc_type, '_search'), params=params, body=body)
    661 
    662     @query_params('_source', '_source_exclude', 
'_source_include',

~/anaconda3/lib/python3.7/site-packages/elasticsearch/transport.py 
in perform_request(self, method, url, headers, params, body)
    316                 delay = 2**attempt - 1
    317                 time.sleep(delay)
--> 318                 status, headers_response, data = 
connection.perform_request(method, url, params, body, 
headers=headers, ignore=ignore, timeout=timeout)
    319 
    320             except TransportError as e:

~/anaconda3/lib/python3.7/site- 
packages/elasticsearch/connection/http_urllib3.py in 
perform_request(self, method, url, params, body, timeout, ignore, 
headers)
    184         if not (200 <= response.status < 300) and 
response.status not in ignore:
    185             self.log_request_fail(method, full_url, url, 
body, duration, response.status, raw_data)
--> 186             self._raise_error(response.status, raw_data)
    187 
    188         self.log_request_success(method, full_url, url, 
body, response.status,

~/anaconda3/lib/python3.7/site- 
packages/elasticsearch/connection/base.py in _raise_error(self, 
status_code, raw_data)
    123             logger.warning('Undecodable raw error response 
from server: %s', err)
    124 
--> 125         raise HTTP_EXCEPTIONS.get(status_code, 
TransportError)(status_code, error_message, additional_info)
    126 
    127 

RequestError: RequestError(400, 'parsing_exception', 'no [query] 
registered for [query]')

有人可以帮助我吗?

谢谢

1 个答案:

答案 0 :(得分:0)

无法在此处进行测试,但是非常确定您必须删除第二个查询。请告诉我是否可行。

request_body = {
  "query": {
    "bool": {
      "must": [
        {
            "match": {
               "text.phonetic": {
                "query": "Meier"
              }
            }
        }],
     "should" : [
        {
          "fuzzy": {
            "text": {
              "value": "Meier",
              "fuzziness": 2
            }
          }
        }
      ]
    }
  },
  "size" : 15
}