Elasticsearch不能暗示hunspell

时间:2019-08-15 08:23:40

标签: docker elasticsearch hunspell

我正在从docker-compose.yml文件中运行elasticsearch:

version: '3'
services:
 mongo:
   image: mongo
   container_name: mongo-cust-mycom
   ports:
     - 27017:27017
   volumes:
     - cust-mycom-mongo:/data/db
   networks:
     - cust-mycom

 elasticsearch:
   image: docker.elastic.co/elasticsearch/elasticsearch:6.5.4
   container_name: elasticsearch-cust-mycom
   ports:
     - 9200:9200
     - 9300:9300
   volumes:
     - cust-mycom-elastic:/usr/share/elasticsearch/data
     - ./cust/config/elasticsearch/config/hunspell:/usr/share/elasticsearch/config/hunspell
   networks:
     - cust-mycom
   environment:
     - cluster.name=i3-elasticsearch
     - xpack.security.enabled=false
     - xpack.monitoring.enabled=false
     - xpack.ml.enabled=false
     - xpack.graph.enabled=false
     - xpack.watcher.enabled=false
   restart: unless-stopped

 kibana:
   image: docker.elastic.co/kibana/kibana:6.5.4
   container_name: kibana-cust-mycom
   ports:
     - 5601:5601
   networks:
     - cust-mycom
   depends_on:
     - elasticsearch
   restart: unless-stopped


networks:
 cust-mycom:
   driver: bridge

volumes:
 cust-mycom-mongo:
 cust-mycom-elastic:

使用docker-compose up -d

当我尝试创建所需的索引时,请使用以下json:

{
    "settings": {
        "number_of_shards": 3,
        "number_of_replicas": 2,
        "analysis": {
            "filter": {
                "swedish_stemmer": {
                    "type": "hunspell",
                    "language": "sv_SE"
                },
                "ins_pattern": {
                    "type": "pattern_capture",
                    "patterns": [
                        "([a-zåäö]*)(prod)"
                    ]
                },
                "cust_stopwords": {
                    "type":       "stop",
                    "stopwords":  [ "en", "ett", "det", "den" ]
                }
            },
            "analyzer": {
                "swedish_index": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "ins_pattern",
                        "swedish_stemmer"
                    ]
                },
                "swedish_query": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "swedish_stemmer",
                        "cust_stopwords"
                    ]
                }
            }
        }
    },
    "mappings": {
        "default": {
            "properties": {
                "keywords": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query"
                },
                "audience": {
                    "type": "keyword"
                },
                "contentExcerpt": {
                    "type": "text"
                },
                "date": {
                    "type": "date",
                    "store": true,
                    "format": "dateOptionalTime"
                },
                "validUntil": {
                    "type": "date",
                    "store": true,
                    "format": "dateOptionalTime"
                },
                "informationType": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query"
                },
                "mainContentOfPage": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query",
                    "term_vector": "with_positions_offsets"
                },
                "thumbnailUrl": {
                    "type": "keyword",
                    "store": true,
                    "norms": false
                },
                "title": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query"
                },
                "url": {
                    "type": "keyword",
                    "store": true,
                    "norms": false
                },
                "tags": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query"
                }
            }
        }
    }
}

和以下脚本:

#!/bin/bash

curl -XDELETE http://localhost:9200/main
curl -XPUT -H "Content-type: application/json" -d @json/custse.index.json http://localhost:9200/main
curl -XPUT http://localhost:9200/main/_settings -H "Content-Type: application/json" -d "{
    \"index\" : {
        \"number_of_replicas\" : 0
    }
}"

我收到以下错误消息:

{"error":{"root_cause":[{"type":"illegal_state_exception","reason":"failed to load hunspell dictionary for locale: sv_SE"}]

我尝试将hunspell词典放入/usr/share/elasticsearch/config/hunspell//usr/share/elasticsearch/hunspell/etc/elasticsearch/hunspell/etc/elasticsearch/config/hunspell中。它找不到任何一个。

以下是hunspell文件的内容:

/etc/elasticsearch$ ls hunspell
sv_SE

/etc/elasticsearch$ ls hunspell/sv_SE/
cust.dic  README_sv_SE.txt  sv_SE.aff  sv_SE.dic

如何让Elasticsearch查找hunspell词典?

1 个答案:

答案 0 :(得分:1)

我确认此配置有效:

version: '3.4'
    
services:    
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:6.8.13
    container_name: elasticsearch6
    ports:
      - "127.0.0.1:9200:9200"
    environment:
      - "ES_JAVA_OPTS=-Xms256m -Xmx256m"
    restart: always
    volumes:
      - "es_data:/usr/share/elasticsearch/data"
      - ./elasticsearch/hunspell:/usr/share/elasticsearch/config/hunspell

volumes:
  es_data: