Question

我在#lamIndia中有一个带有标签的标签。现在，当单击此主题标签时，它应该获取所有具有＆＃34;＃teamIndia＆＃34;在它中，首先应该显示＆＃34;＃teamIndia＆＃34;的结果，然后使用＆＃34; teamIndia＆＃34;然后＆＃34;团队印度＆＃34;然后＆＃34;团队＆＃34;或＆＃34;印度＆＃34;等等。

我在做什么：

搜索文字： ＆＃34; #groupIndia＆＃34;，＆＃34;＃NEWYORK＆＃34;，＆＃34; #profession＆＃34;，＆＃34;＃2016＆＃34;

POST /clip
{
    "settings": {
        "analysis": {
            "char_filter" : {
                "space_hashtags" : {
                    "type" : "mapping",
                    "mappings" : ["#=>|#"]
                }
            },
            "filter": {
                "substring": {
                    "max_gram": "20",
                    "type": "nGram",
                    "min_gram": "1",
                    "token_chars": [
                        "whitespace"
                    ]
                },
                "camelcase": {
                    "type": "word_delimiter",
                    "type_table": ["# => ALPHANUM", "@ => ALPHANUM"]
                },
                "stopword": {
                    "type":       "stop",
                    "stopwords": ["and", "is", "the"]
                }
            },
            "analyzer": {
                "substring_analyzer": {
                    "filter": [
                        "lowercase",
                        "substring"
                    ],
                    "tokenizer": "standard"
                },
                "camelcase_analyzer": {
                    "type" : "custom",
                    "char_filter" : "space_hashtags",
                    "tokenizer" : "whitespace",
                    "filter": [
                        "camelcase",
                        "lowercase",
                        "stopword"
                    ]
                }
            }
        }
    },
    "mappings": {
        "Clip": {
            "properties": {
                "description": {
                    "type": "multi_field",
                    "fields": {
                        "description": {
                            "type": "string",
                            "analyzer": "substring_analyzer",
                            "search_analyzer": "standard"
                        },
                        "raw": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "hashtag": {
                            "type": "string",
                            "index": "analyzed",
                            "analyzer": "camelcase_analyzer"
                        }
                    }
                },
                ....
            }
        }
    }
}

文档示例： -

POST /clip/Clip/2 {"id" : 1, "description" : "TheBestAndTheBeast"}

POST /clip/Clip/3 {"id" : 2, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}

POST /clip/Clip/3 {"id" : 2, "description" : "Know how a software engineer surprised his wife!    &lt;a href=&quot;search/clips?q=%23theProvider&amp;source=hashtag&quot; ng-click=&quot;handleModalClick()&quot;&gt; #theProvider &lt;/a&gt;     rioOlympic   &lt;a href=&quot;search/clips?q=%23DUBAI&amp;source=hashtag&quot; ng-click=&quot;handleModalClick()&quot;&gt; #DUBAI &lt;/a&gt;    &lt;a href=&quot;search/clips?q=%23TheBestAndTheBeast&amp;source=hashtag&quot; ng-click=&quot;handleModalClick()&quot;&gt; #TheBestAndTheBeast &lt;/a&gt;   &lt;a href=&quot;search/clips?q=%23rioOlympic&amp;source=hashtag&quot; ng-click=&quot;handleModalClick()&quot;&gt; #rioOlympic &lt;/a&gt;"}

**搜索查询**

GET clip/_search
{
"size": 100,
"query": {
    "filtered": {
        "query": {
            "bool": {
                "must":
                   {
                    "query_string": {
                        "fields": [
                           "description.hashtag"
                        ],
                        "query": "teamIndia"
                    }                         
                },
                "should": { 
                    "match": 
                        { "description.raw": "#teamIndia"}   
                }
            }
        }
    }
}

}

例外结果： ＆＃34;＃teamIndia＆＃34 ;, ＆＃34; teamIndia＆＃34 ;, ＆＃34;印度队＆＃34;，＆＃34;队＆＃34 ;, ＆＃34;印度＆＃34;，

和其他测试关键字类似。

Answer 1

原始帖子中的查询无法按预期运行的原因之一是因为description.raw是not_analyzed。因此，#teamIndia将永远不会与description: "Animals and Pets and #teamIndia"包含description.raw的文档匹配未经分析的字词Animals and Pets and #teamIndia而非#teamIndia

假设您拥有的文档类似于OP中的第二个示例。

示例：

{"id" : 2, "description" : "Animals and Pets and #teamIndia"}

OR

{"id":7,"description":"This &lt;a href=&quot;search/clips?q=%23teamIndia&amp;source=hashtag&quot;&gt;#teamIndia&lt;/a&gt;"}

然后您应该能够按以下顺序对文档进行排名：

1）包含“#teamIndia”的说明，
2）描述包含“teamIndia”
3）描述包含“团队印度”
4）包含“印度”的描述

在wordlimiter过滤器中启用preserve_orginal和catenate_words，如下例所示

示例：

索引文档

PUT clip { "settings": { "analysis": { "char_filter": { "zwsp_normalize": { "type": "mapping", "mappings": [ "\\u200B=>", "\\u200C=>", "\\u200D=>" ] }, "html_decoder": { "type": "mapping", "mappings": [ "<=> <", ">=> >" ] } }, "filter": { "camelcase": { "type": "word_delimiter", "preserve_original": "true", "catenate_all": "true" }, "stopword": { "type": "stop", "stopwords": [ "and", "is", "the" ] } }, "analyzer": { "camelcase_analyzer": { "type": "custom", "tokenizer": "whitespace", "filter": [ "camelcase", "lowercase", "stopword" ], "char_filter": [ "zwsp_normalize", "html_decoder", "html_strip" ] } } } }, "mappings": { "Clip": { "properties": { "description": { "type": "multi_field", "fields": { "hashtag": { "type": "string", "index": "analyzed", "analyzer": "camelcase_analyzer", "norms": { "enabled": false } } } } } } } } POST /clip/Clip/1 { "id": 1, "description": "Animals and Pets and #teamIndia" } POST /clip/Clip/2 { "id": 2, "description": "Animals and Pets and teamIndia" } POST /clip/Clip/3 { "id": 3, "description": "Animals and Pets and team India" } POST /clip/Clip/4 { "id": 4, "description": "Animals and Pets and India" } POST /clip/Clip/7 { "id": 7, "description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>" }

查询结果：

POST clip/_search?search_type=dfs_query_then_fetch { "size": 100, "query": { "filtered": { "query": { "bool": { "must": [ { "query_string": { "fields": [ "description.hashtag" ], "query": "#teamIndia" } } ] } } } } }

<强>结果：

"hits": { "total": 5, "max_score": 1.4969246, "hits": [ { "_index": "clip", "_type": "Clip", "_id": "7", "_score": 1.4969246, "_source": { "id": 7, "description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>" } }, { "_index": "clip", "_type": "Clip", "_id": "1", "_score": 1.4969246, "_source": { "id": 1, "description": "Animals and Pets and #teamIndia" } }, { "_index": "clip", "_type": "Clip", "_id": "2", "_score": 1.0952718, "_source": { "id": 2, "description": "Animals and Pets and teamIndia" } }, { "_index": "clip", "_type": "Clip", "_id": "3", "_score": 0.5207714, "_source": { "id": 3, "description": "Animals and Pets and team India" } }, { "_index": "clip", "_type": "Clip", "_id": "4", "_score": 0.11123338, "_source": { "id": 4, "description": "Animals and Pets and India" } } ] }

示例#dubai：

POST /clip/Clip/5 { "id": 5, "description": "#dubai is hot" } POST /clip/Clip/6 { "id": 6, "description": "dubai airport is huge" } POST clip/_search?search_type=dfs_query_then_fetch { "size": 100, "query": { "filtered": { "query": { "bool": { "must": [ { "query_string": { "fields": [ "description.hashtag" ], "query": "#dubai" } } ] } } } } } "hits": { "total": 2, "max_score": 1.820827, "hits": [ { "_index": "clip", "_type": "Clip", "_id": "5", "_score": 1.820827, "_source": { "id": 5, "description": "#dubai is hot" } }, { "_index": "clip", "_type": "Clip", "_id": "6", "_score": 0.5856731, "_source": { "id": 6, "description": "dubai airport is huge" } } ] }

示例#professionalAndPunctual：

POST /clip/Clip/7 { "id": 7, "description": "professionalAndPunctual" } POST clip/_search?search_type=dfs_query_then_fetch { "size": 100, "query": { "filtered": { "query": { "bool": { "must": [ { "query_string": { "fields": [ "description.hashtag" ], "query": "#professionalAndPunctual" } } ] } } } } } "hits": [ { "_index": "clip", "_type": "Clip", "_id": "7", "_score": 2.2149992, "_source": { "id": 7, "description": "professionalAndPunctual" } } ]

已编辑的示例

示例：＃TheBestAndTheBea st

POST /clip/Clip/10 {"id" : 10, "description" : "TheBestAndTheBeast"} POST /clip/Clip/11 {"id" :11, "description" : "bikes in DUBAI TheBestAndTheBeast profession"} POST /clip/Clip/12 {"id" : 12, "description" : "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"} POST clip/_search?search_type=dfs_query_then_fetch { "size": 100, "query": { "filtered": { "query": { "bool": { "must": [ { "query_string": { "fields": [ "description.hashtag" ], "query": "#TheBestAndTheBeast" } } ] } } } } }

#Results

"hits": [ { "_index": "clip", "_type": "Clip", "_id": "12", "_score": 1.8701664, "_source": { "id": 12, "description": "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>" } }, { "_index": "clip", "_type": "Clip", "_id": "10", "_score": 0.9263139, "_source": { "id": 10, "description": "TheBestAndTheBeast" } }, { "_index": "clip", "_type": "Clip", "_id": "11", "_score": 0.9263139, "_source": { "id": 11, "description": "bikes in DUBAI TheBestAndTheBeast profession" } } ]

分析器示例：

get clip/_analyze?analyzer=camelcase_analyzer&text=%23DUBAI { "tokens": [ { "token": "#dubai", "start_offset": 0, "end_offset": 6, "type": "word", "position": 0 }, { "token": "dubai", "start_offset": 1, "end_offset": 6, "type": "word", "position": 0 } ] } get clip/_analyze?analyzer=camelcase_analyzer&text=This%20%26lt%3Ba%20href%3D%26quot%3Bsearch%2Fclips%3Fq%3D%2523teamIndia%26amp%3Bsource%3Dhashtag%26quot%3B%26gt%3B%23teamIndia%26lt%3B%2Fa%26gt%3B { "tokens": [ { "token": "this", "start_offset": 0, "end_offset": 4, "type": "word", "position": 0 }, { "token": "#teamindia", "start_offset": 78, "end_offset": 98, "type": "word", "position": 1 }, { "token": "india", "start_offset": 78, "end_offset": 98, "type": "word", "position": 2 }, { "token": "team", "start_offset": 78, "end_offset": 98, "type": "word", "position": 2 }, { "token": "teamindia", "start_offset": 78, "end_offset": 98, "type": "word", "position": 2 } ] }

Elasticsearch：点击Hashtag搜索结果

1 个答案:

已编辑的示例