如何使用带有Highlighter和StandardAnalyzer的Lucene通过Hibernate搜索引擎获取文本片段

时间:2014-07-26 21:14:57

标签: java hibernate lucene hibernate-search

我正在阅读有关Lucene的Hibernate搜索引擎,我能够将其设置为通过mysql db保存文章的Article类。我的目标是向用户显示他们在浏览器上搜索的文章的文章和片段。这是我到目前为止所实现的:

@Entity
@Indexed
@Analyzer(impl = StandardAnalyzer.class)
@Table(name = "ARTICLE", catalog = "kefet3")
public class Article implements java.io.Serializable {


private static final long serialVersionUID = 1L;
// Fields

private Integer id;
private Articlelanguage articlelanguage;
private Users users;
private Articlecategory articlecategory;
@Analyzer(impl = StandardAnalyzer.class)
@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
private String artTitle;
@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
private String artContent;

对于上面的代码,有get和set以及db的所有注释。

下面是搜索的方法。

   @Override
    @SuppressWarnings("unchecked")
    public List<Article> search(String word) {

        analyzer = new StandardAnalyzer(Version.LUCENE_36);
        FullTextSession fullTextSession = Search.getFullTextSession(getCurrentSession());


        // get a query builder
        QueryBuilder queryBuilder = fullTextSession.getSearchFactory()
                .buildQueryBuilder().forEntity(Article.class).get();

        // build the query
        org.apache.lucene.search.Query query = queryBuilder.keyword().
                onFields("artTitle","artContent")
                .matching(word).createQuery();


       FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(query, Article.class);


        // wrap Lucene query in a javax.persistence.Query
       // org.hibernate.Query fullTextQuery = fullTextSession.createFullTextQuery(query, Article.class);

        List<Article> searchResultList =  fullTextQuery.list();

        for (int i=0; i<searchResultList.size(); i++){
            System.out.println("searchResultList###################"+searchResultList.get(i).getArtTitle());
        }




        Highlighter highlighter = new Highlighter( new QueryScorer( query ) );

        highlighter.setTextFragmenter( new SimpleFragmenter( 20 ) );

        int maxNumFragmentsRequired = 3;

        for(Article art: searchResultList){
            String artContent = art.getArtContent();
            String artTitle = art.getArtTitle();

            TokenStream tokenStream1 =
                    analyzer.tokenStream( "artContent", new StringReader( artContent ) );
            TokenStream tokenStream2 =
                    analyzer.tokenStream( "artTitle", new StringReader( artTitle ) );


            String result=null;
            String resul2=null;
            try {
                result = highlighter.getBestFragments( tokenStream1, artContent, maxNumFragmentsRequired, " ..." );

                resul2 = highlighter.getBestFragments( tokenStream2, artTitle, maxNumFragmentsRequired, " ..." );

            } catch (IOException e) {
                // TODO Auto-generated catch block
                System.out.println("((((((((((((((((((((IOException))))))))))))))))))))"+e);
                e.printStackTrace();
            } catch (InvalidTokenOffsetsException e) {
                // TODO Auto-generated catch block
                System.out.println("((((((((((((((((((((InvalidTokenOffsetsException))))))))))))))))))))"+e);
                e.printStackTrace();
            }

            System.out.println( result );

            System.out.println( resul2 );       
        }

    //    fullTextSession.close();

        return searchResultList;
    }

我得到的结果是:

org.springframework.web.util.NestedServletException: Request processing failed; nested exception is java.lang.IllegalStateException: No match found
org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:973)
org.springframework.web.servlet.FrameworkServlet.doGet(FrameworkServlet.java:852)
javax.servlet.http.HttpServlet.service(HttpServlet.java:620)
org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:837)
javax.servlet.http.HttpServlet.service(HttpServlet.java:727)
org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
com.github.dandelion.datatables.core.web.filter.DatatablesFilter.doFilter(DatatablesFilter.java:73)

root cause

java.lang.IllegalStateException: No match found
java.util.regex.Matcher.group(Matcher.java:485)
java.util.regex.Matcher.group(Matcher.java:445)
com.kefet.dao.impl.SearchDAOImpl.search(SearchDAOImpl.java:125)
com.kefet.service.impl.SearchServiceImpl.search(SearchServiceImpl.java:47)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:606)
org.springframework.aop.support.AopUtils.invokeJoinpointUsingReflection(AopUtils.java:317)
org.springframework.aop.framework.ReflectiveMethodInvocation.invokeJoinpoint(ReflectiveMethodInvocation.java:190)
org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:157)
org.springframework.transaction.interceptor.TransactionInterceptor$1.proceedWithInvocation(TransactionInterceptor.java:98)
org.springframework.transaction.interceptor.TransactionAspectSupport.invokeWithinTransaction(TransactionAspectSupport.java:262)
org.springframework.transaction.interceptor.TransactionInterceptor.invoke(TransactionInterceptor.java:95)
org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
org.springframework.aop.framework.JdkDynamicAopProxy.invoke(JdkDynamicAopProxy.java:207)
com.sun.proxy.$Proxy48.search(Unknown Source)
com.kefet.controller.SearchController.searchText(SearchController.java:30)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:606)
org.springframework.web.method.support.InvocableHandlerMethod.invoke(InvocableHandlerMethod.java:215)
org.springframework.web.method.support.InvocableHandlerMethod.invokeForRequest(InvocableHandlerMethod.java:132)
org.springframework.web.servlet.mvc.method.annotation.ServletInvocableHandlerMethod.invokeAndHandle(ServletInvocableHandlerMethod.java:104)
org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.invokeHandleMethod(RequestMappingHandlerAdapter.java:749)
org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.handleInternal(RequestMappingHandlerAdapter.java:689)
org.springframework.web.servlet.mvc.method.AbstractHandlerMethodAdapter.handle(AbstractHandlerMethodAdapter.java:83)
org.springframework.web.servlet.DispatcherServlet.doDispatch(DispatcherServlet.java:938)
org.springframework.web.servlet.DispatcherServlet.doService(DispatcherServlet.java:870)
org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:961)
org.springframework.web.servlet.FrameworkServlet.doGet(FrameworkServlet.java:852)
javax.servlet.http.HttpServlet.service(HttpServlet.java:620)
org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:837)
javax.servlet.http.HttpServlet.service(HttpServlet.java:727)
org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
com.github.dandelion.datatables.core.web.filter.DatatablesFilter.doFilter(DatatablesFilter.java:73)

我找到的教程位于以下链接:

https://code.google.com/p/hibernatesearchinaction/source/browse/trunk/ch13/src/com/manning/hsia/dvdstore/TestHighlighter.java?r=86

我的pom文件包含:

<hibernate.version>4.3.5.Final</hibernate.version>
<hibernate-search-orm.version>4.5.1.Final</hibernate-search-orm.version>
<hibernate-search-analyzers.version>4.5.1.Final</hibernate-search-analyzers.version>
<hibernate-search-infinispan.version>4.5.1.Final</hibernate-search-infinispan.version>
<lucene-highlighter.version>4.9.0</lucene-highlighter.version>
<lucene-analyzers-common.version>4.9.0</lucene-analyzers-common.version>

<mysql.connector.version>5.1.30</mysql.connector.version>

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-orm</artifactId>
<version>${hibernate-search-orm.version}</version>
</dependency>

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-analyzers</artifactId>
<version>${hibernate-search-analyzers.version}</version>
</dependency>


<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-infinispan</artifactId>
<version>${hibernate-search-infinispan.version}</version>
</dependency>   

<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${lucene-highlighter.version}</version>
</dependency>   

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-core</artifactId>
<version>${hibernate.version}</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-entitymanager</artifactId>
<version>${hibernate.version}</version>
</dependency>
<dependency>

提前谢谢

1 个答案:

答案 0 :(得分:0)

我不知道如何使用令牌流做到这一点,但这是由于过去3小时hibernate搜索的一些黑客攻击的结果。它真的需要黑客来执行简单的任务。这个解决方案最终适用于HS 4.5.1和Lucene 3.6.2(IBaseEntity是&#34;某些东西&#34;带有id):

public static final String HIGHLIGHTER_PRE = "<span class='search-found'>";
public static final String HIGHLIGHTER_POST = "</span>";

protected static DocumentBuilderIndexedEntity getDocumentBuilder(Session session, Class clazz) {
    FullTextSession fullTextSession = Search.getFullTextSession(session);
    SearchFactoryImplementor searchFactoryImplementor =
        (SearchFactoryImplementor) fullTextSession.getSearchFactory();
    EntityIndexBinding entityIndexBinding = searchFactoryImplementor.getIndexBinding(clazz);
    return entityIndexBinding.getDocumentBuilder();
}

/**
 * Provides lucene document for given entity.
 */
@SuppressWarnings("unchecked")
public static Document getDocument(Session session, IBaseEntity o, Class clazz) {
    return getDocumentBuilder(session, clazz).getDocument(o, o.getId(), new HashMap<String, String>(),
        new HibernateSessionLoadingInitializer((SessionImplementor) session),
        new ContextualExceptionBridgeHelper());
}

/**
 * Provides lucene analyzer for given entity.
 */
public static Analyzer getAnalyzer(Session session, Class clazz) {
    return getDocumentBuilder(session, clazz).getAnalyzer();
}

/**
 * @param luceneQuery You have it before you create {@link FullTextQuery} from {@link FullTextSession}.
 */
public static Highlighter getHighlighter(Query luceneQuery) {
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHTER_PRE, HIGHLIGHTER_POST),
        new QueryScorer(luceneQuery));
    highlighter.setTextFragmenter(new SimpleFragmenter());
    return highlighter;
}

/**
 * Returns entity raw lucene text content without any analyzer usage (ie. even with HTML tags).
 */
public static String getFullTextContent(Session session, IBaseEntity o) {
    if (o==null)
        return "";

    Set<String> strings = new LinkedHashSet<String>(); // to avoid multiple same strings
    Document document = getDocument(session, o, o.getClass());

    List<org.apache.lucene.document.Field> fields = new ArrayList<org.apache.lucene.document.Field>();

    // here is the part where we should add fields by name to text content (manually as well)
    for (String fieldName : listIndexedFields(o.getClass()))
        fields.addAll(Arrays.asList(document.getField(fieldName)));

    for (org.apache.lucene.document.Field field: fields) {
        if (field!=null) {
            String s = field.stringValue();
            if (!StringUtils.isBlank(s))
                strings.add(s);
        }
    }

    if (strings.isEmpty())
        return "";

    StringBuilder sb = new StringBuilder();
    for (String s: strings) {
        if (sb.length()>0)
            sb.append(" ");
        sb.append(s);
    }
    return sb.toString();
}

/**
 * @return Lucene highlighter text for search results for given object.
 */
public static String getHighlighterText(Session session, IBaseEntity o, Query query) {
    Analyzer analyzer = getAnalyzer(session, o.getClass());
    Highlighter highlighter = getHighlighter(query);
    return getHighlightedText(analyzer, highlighter, getFullTextContent(session, o));
}

public static String getHighlightedText(Analyzer analyzer, Highlighter highlighter, String fullTextContent) {
    try {
        return highlighter.getBestFragment(analyzer, null, fullTextContent);
    } catch (Exception e) {
        throw new RuntimeException("Cannot highlight lucene results", e);
    }
}