以下设置适用于我们,但我们可以看到更好的结果 想保留特殊字符。可以有人建议什么 更改以下保存特殊字符所需的设置? 以下设置适用于我们,但我们可以看到更好的结果 我想'保留特殊字符。可以有人建议什么 更改以下保存特殊字符所需的设置?
we would like to preserve following special characters...
+ - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
settingsSimple() {
curl -XPUT "$es_host:9200/$INDEX/" --data-binary @- <<EOF
{
"settings": {
"index": {
"number_of_shards": $(($(curl -s $es_hostname:9200/_cat/nodes | wc -
l)*2)),
"number_of_replicas": 0,
"refresh_interval": "-1"
},
"analysis": {
"char_filter": {
"chr_decoder": {
"type": "mapping",
"mappings": [
"2=>tu",
"@=>a",
"$=>s",
".=>-",
"’=> ",
":=> ",
"!=> "
],
"preserve_original": true
},
"chr_decoder2": {
"type": "mapping",
"mappings": [
"2=>tu",
"@=>a",
"$=>s"
],
"preserve_original": true
},
"dash_remover": {
"type": "pattern_replace",
"pattern": "-",
"replacement": ""
}
},
"analyzer": {
"all_analyzer": {
"char_filter": [
"chr_decoder",
"dash_remover"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"ascii_folding",
"stop_filter"
]
},
"name_analyzer": {
"char_filter": [
"chr_decoder2"
],
"tokenizer": "ngram_tokenizer",
"filter": [
"lowercase",
"asciifolding"
]
},
"keep_analyzer": {
"char_filter": [
"chr_decoder2"
],
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"filter": {
"ascii_folding": {
"type": "asciifolding",
"preserve_original": false
},
"stop_filter": {
"type": "stop",
"stopwords": [
"and",
"the"
]
},
"shingle_filter": {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 3,
"output_unigrams": true,
"token_separator": " ",
"filler_token": " "
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "edgeNGram",
"min_gram": "2",
"max_gram": "20",
"token_chars": [
"letter",
"digit"
],
"preserve_original": true
}
}
}
}
}
EOF
}
Query I use follows here:
{
"from" : 0,
"size" : 10,
"query" : {
"function_score" : {
"query" : {
"bool" : {
"should" : [ {
"query_string" : {
"query" : "!Mayday!",
"fields" : [ "enm^8", "pnm^12", "upnm^31", "_all^10" ],
"default_operator" : "and",
"analyzer" : "all_analyzer"
}
}, {
"query_string" : {
"query" : "!Mayday!",
"fields" : [ "_all^8", "enm^8", "pnm^12", "upnm^31" ],
"default_operator" : "and",
"analyzer" : "keep_analyzer"
}
} ]
}
},
"filter" : {
"terms" : {
"sml" : [ "0" ]
}
},
"functions" : [ {
"field_value_factor" : {
"field" : "pop",
"factor" : 6.0,
"missing" : 0.0,
"modifier" : "none"
}
} ],
"boost_mode" : "sum"
}
},
"fields" : [ "uid", "enm", "pid", "upid", "pnm", "rd", "upnm" ]
}
答案 0 :(得分:0)
我有同样的问题。因为我正在使用&#34;空白&#34; tokenizer它保留所有特殊字符:
我已经通过以下方式创建了索引模板。 然后,所有输入,聚合和查询都正确显示我的变量。
PUT _template/tracking_all
{
"template": "tracking*",
"version": 1,
"settings": {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"default": {
"type": "custom",
"filter": [ "lowercase" ],
"tokenizer": "whitespace"
}
}
}
}
}
}