弹性搜索结果仅显示孤立的搜索词

时间:2020-08-28 17:14:56

标签: elasticsearch

由于某种原因,当我们执行弹性搜索时,生成的搜索结果首先显示结果,其中搜索词是单独隔离的,而不是在句子中。例如,如果搜索“神奇”一词,则会得到以下结果:

{
    "id": 1697670,
    "start": 614.68,
    "end": 619.03,
    "text": "fantastic",
    "created_at": null,
    "updated_at": null,
    "video_id": 14784,
    "index": 127,
    "highlight": "<span class=\"highlight\">fantastic<\/span>"
  },

对于包含“ fantastic”的完整句子,我们不会获得任何结果。

我猜这是我们的地图,还是实际的搜索。

这是我们的设置:

static public function getSettings(){
    return [
        'number_of_shards' => 1,
        'number_of_replicas' => 1,
        'analysis' => [
            'filter' => [
                'filter_stemmer' => [
                    'type' => 'stemmer',
                    'language' => 'english'
                ]
            ],
            'analyzer' => [
                'text_analyzer' => [
                    'type' => 'custom',
                    "stopwords" => [],
                    'filter' => ['lowercase', 'filter_stemmer'],
                    'tokenizer' => 'standard'
                ],
                'g_analyzer' => [
                    'type' => 'custom',
                    'filter' => ['lowercase', 'stemmer'],
                    'tokenizer' => 'standard'
                ],
                "no_stopwords" => [
                    "type" => "standard",
                    "stopwords" => []
                ],
            ]
        ]
    ];
}

这是我们的映射:

static public function getMappings(){
    return [
        '_source' => [
            'enabled' => true
        ],
        'properties' => [
            'id' => [
                'type' => 'integer'
            ],
            'title' => [
                'type' => 'text',
                "analyzer" => "text_analyzer",
            ],
            'description' => [
                'type' => 'text',
                "analyzer" => "text_analyzer",
            ],
            'jobStatus' => [
                'type' => 'text'
            ],
            'youtubeId' => [
                'type' => 'text',
            ],
            'thumbnail' => [
                'type' => 'text'
            ],
            'playlistId' => [
                'type' => 'text'
            ],
            'channelId' => [
                'type' => 'text'
            ],
            'category' => [
                'type' => 'text'
            ],
            'globifyChannelId' => [
                'type' => 'integer'
            ],
            'publishedDate' => [
                "type" => "date",
            ],
            'created_at' => [ //date video was updated
                "type" => "date",
            ],
            'updated_at' => [ //date video was updated
                "type" => "date",
            ],
            'url' => [
                'type' => 'text'
            ],
            'subtitles' => [
                'type' => 'nested',
                'properties' => [
                    'id' => [
                        'type' => 'integer'
                    ],
                    'start_time' => [
                        'type' => 'float'
                    ],
                    'end_time' => [
                        'type' => 'float'
                    ],
                    'text' => [
                        'type' => 'text',
                        "analyzer" => "text_analyzer",
                    ],
                    'langcode' => [
                        'type' => 'text'
                    ],
                ]
            ]

        ]
    ];

}

这是我们的搜索

  $body = [
        'query' => [

            'nested' => [
                'inner_hits' => [
                    'size' => 3,
                    "highlight" => [
                        "pre_tags" => ['<span class="highlight">'],
                        "post_tags" => ["</span>"],
                        'fields' => [
                            'subtitles.text' => new \stdClass()
                        ]
                    ]
                ],

                'path' => 'subtitles',
                'query' => [

                    'bool' => [
                        'must' => [
                            [
                                'match' => ['subtitles.text' => $searchTerm]
                            ]
                        ]
                    ]

                ],


            ]
        ],
        "from"=> $from,
        "size"=>$pageSize,

    ];

任何想法为什么我们会得到这些奇怪的结果?

我使用了错误的分析器/令牌生成器/过滤器吗?

任何帮助/建议或指导将不胜感激

0 个答案:

没有答案