在弹性搜索中配置分析器

时间:2015-01-22 14:45:42

标签: java elasticsearch lucene

我在下面的程序中写道,了解弹性搜索如何用于全文搜索。在这里,当我搜索单个单词时,它的工作正常,但我想搜索单词的组合,但这不起作用。

package in.blogspot.randomcompiler.elastic_search_demo;

import in.blogspot.randomcompiler.elastic_search_impl.Event;

import java.util.Date;

import org.elasticsearch.action.count.CountRequestBuilder;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;

import com.fasterxml.jackson.core.JsonProcessingException;

public class ElasticSearchDemo
{
    public static void main( String[] args ) throws JsonProcessingException
    {
        Client client = new TransportClient()
        .addTransportAddress(new InetSocketTransportAddress("localhost", 9301));

        DeleteResponse deleteResponse1 = client.prepareDelete("chat-data", "event", "1").execute().actionGet();
        DeleteResponse deleteResponse2 = client.prepareDelete("chat-data", "event", "2").execute().actionGet();
        DeleteResponse deleteResponse3 = client.prepareDelete("chat-data", "event", "3").execute().actionGet();

        Event e1 = new Event("LOGIN", new Date(), "Agent1 logged into chat");
        String e1Json = e1.prepareJson();        
        System.out.println("JSON: " + e1Json);        
        IndexResponse indexResponse1 = client.prepareIndex("chat-data", "event", "1").setSource(e1Json).execute().actionGet();
        printIndexResponse("e1", indexResponse1);

        Event e2 = new Event("LOGOUT", new Date(), "Agent1 logged out of chat");
        String e2Json = e2.prepareJson();        
        System.out.println("JSON: " + e2Json);        
        IndexResponse indexResponse2 = client.prepareIndex("chat-data", "event", "2").setSource(e2Json).execute().actionGet();
        printIndexResponse("e2", indexResponse2);

        Event e3 = new Event("BREAK", new Date(), "Agent1 went on break in the middle of a chat");
        String e3Json = e3.prepareJson();        
        System.out.println("JSON: " + e3Json);        
        IndexResponse indexResponse3 = client.prepareIndex("chat-data", "event", "3").setSource(e3Json).execute().actionGet();
        printIndexResponse("e3", indexResponse3);

        FilterBuilder filterBuilder = FilterBuilders.termFilter("value", "break middle");

        SearchRequestBuilder searchBuilder = client.prepareSearch();
        searchBuilder.setPostFilter(filterBuilder);

        CountRequestBuilder countBuilder = client.prepareCount();
        countBuilder.setQuery(QueryBuilders.constantScoreQuery(filterBuilder));

        CountResponse countResponse1 = countBuilder.execute().actionGet();
        System.out.println("HITS: " + countResponse1.getCount());


        SearchResponse searchResponse1 = searchBuilder.execute().actionGet();
        SearchHits hits = searchResponse1.getHits();
        for(int i=0; i<hits.hits().length; i++) {
            SearchHit hit = hits.getAt(i);
            System.out.println("[" + i + "] " + hit.getId() + " : " +hit.sourceAsString());
        }

        client.close();
    }

    private static void printIndexResponse(String description, IndexResponse response) {
        System.out.println("Index response for: " + description);
        System.out.println("Index name: " + response.getIndex());
        System.out.println("Index type: " + response.getType());
        System.out.println("Index id: " + response.getId());
        System.out.println("Index version: " + response.getVersion());
    }
}

我面临的问题是,当我搜索“break middle”时,它什么也没有返回,期望是它应该返回第3个事件。

我知道我需要配置一个不同的分析器,而不是默认的分析器,以使其适当地进行索引。

有人可以帮助我理解如何做到这一点。一些完整的例子很棒。

1 个答案:

答案 0 :(得分:2)

问题是由于您使用的是Term过滤器而造成的:

FilterBuilder filterBuilder = FilterBuilders.termFilter("value", "break middle");

术语过滤器不会分析查询字符串中的数据 - 因此Elasticsearch正在寻找确切的字符串&#34;中断&#34;。

然而,第三份文件可能已被ES分解为个别条款如下:

Agent1 
went 
on 
break 
in 
the 
middle 
of 
a 
chat

要解决此问题,请使用过滤器或查询来分析您传递的字符串 - 例如,使用Query_String查询或Match查询。

例如:

QueryBuilder qb = QueryBuilders.matchQuery("event", "break middle");

或:

QueryBuilder qb = QueryBuilders.queryString("break middle");

有关详细信息,请参阅Java API documentation for Elasticsearch