文档上的Lucene索引不能通过ClassBridge钩子工作

时间:2014-12-16 16:15:48

标签: java hibernate lucene full-text-search hibernate-search

问题:当我上传word文档时,我可以搜索标题(在上传文档时输入)和摘要,但是当我使用文档中的某些文本进行搜索时,我得到了结果。

我正在使用" hibernate-search-engine:4.3.0.Final,lucene-core:3.6.2"

的pom.xml

   <dependency>
            <groupId>org.hibernate</groupId>
            <artifactId>hibernate-search-orm</artifactId>
            <version>${hibernate.search.version}</version>
        </dependency>
        <dependency>
            <groupId>org.hibernate</groupId>
            <artifactId>hibernate-search-engine</artifactId>
            <version>${hibernate.search.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>3.6.2</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queries</artifactId>
            <version>3.6.2</version>
            <scope>runtime</scope>
        </dependency>

课程

@Entity

@SequenceGenerator(name = "sitepagecontent_seq", sequenceName = "SITEPAGECONTENT_SEQ")
@Indexed(index = "SitePageContent")
@FullTextFilterDef(name = "condition1", impl = StatusFilterFactory.class) //Filter factory with parameters
@Analyzer (impl = StandardAnalyzer.class)
@ClassBridge(name = "splitcontentfileupload",
        index = Index.YES,
        store = Store.YES,
        impl = WordDocHandlerBridge.class,
        params = @org.hibernate.search.annotations.Parameter(name = "padding", value = " ")
        )
@Table(name = "SITE_PAGE_CONTENT")
public class SitePageContent extends BaseObject implements Comparable<SitePageContent> {

    // Fields
    private static final long serialVersionUID = -7424477214552600300L;

    private Long id;

    @IndexedEmbedded
    private Content content;

    @IndexedEmbedded
    private SitePage sitePage;

    private Long sequence;

Content.java

@Entity
@SequenceGenerator(name = "content_seq", sequenceName = "CONTENT_SEQ")

@Table(name = "CONTENT")
@VersionSupportModel
public class Content extends BaseObject implements Comparable<Content> {
// ------------------------------ FIELDS ------------------------------

    // Fields

    private static final long serialVersionUID = 1441591301055742001L;


    private Long id;

    @IndexedEmbedded
    private UploadedFile uploadedFile;

    @IndexedEmbedded
    private ContentType contentType;

    @Field(index = Index.YES, store = Store.YES, analyzer = @Analyzer(impl = StandardAnalyzer.class))
    private String title; 

    private String prevTitle;

    @Field(index = Index.YES, store = Store.YES, analyzer = @Analyzer(impl = StandardAnalyzer.class))
    private String teaser;    

    private String prevTeaser;

    private String linkedContentType;

    private String linkedUrl;

    private Boolean linkedContentPopup;

    @Field(index = Index.YES, store = Store.YES)
    private String status;

    private EcommUser createdBy;

    private EcommUser modifiedBy;

    @Temporal(TemporalType.DATE)
    private Date createdDate;

    @Temporal(TemporalType.DATE)
    private Date modifiedDate;

    private String shared;

    private String statusTemp;

    private Set<Request> requests = new HashSet<Request>(0);

    @ContainedIn
    private Set<SitePageContent> sitePageContents = new HashSet<SitePageContent>(0);

    private Integer version;

    private Integer articleId;

    private String summary;

    @Field(index = Index.YES, store = Store.YES, analyzer = @Analyzer(impl = StandardAnalyzer.class))
    private String description;

    @Field(index = Index.YES, store = Store.YES, analyzer = @Analyzer(impl = StandardAnalyzer.class))
    private String contactName;

    @Field(index = Index.YES, store = Store.YES, analyzer = @Analyzer(impl = StandardAnalyzer.class))
    private String contactPhone;

    @Field(index = Index.YES, store = Store.YES, analyzer = @Analyzer(impl = StandardAnalyzer.class))
    private String contactEmail;

    @Field(index = Index.YES, store = Store.YES, analyzer = @Analyzer(impl = StandardAnalyzer.class))
    private String contactPostalAddress;

WordDocHandlerBridge.java

public class WordDocHandlerBridge implements FieldBridge, ParameterizedBridge { 

    protected final Log log = LogFactory.getLog(getClass());

    public static String paddingProperty = "padding";

    private String padding = "";


    public void setParameterValues(Map arg0) {
        Object padding = arg0.get( paddingProperty );
        if (padding != null) {
            this.padding = (String) padding;
        }
    } 


    public void set(String name, Object value, Document document, LuceneOptions luceneOptions) {
        String fieldValue = "";
        SitePageContent sitCont = (SitePageContent) value;
        Content cont = sitCont.getContent();
        UploadedFile upF = cont.getUploadedFile();
        if (upF != null) {
            String fieldValue1 = upF.getFileContentType();
            if ( fieldValue1 == null ) {
                fieldValue1 = "";
            }
            byte[] fieldValue2 = upF.getFileContent();
            if ( fieldValue2 == null ) {
                fieldValue2 = new byte[0];
            }
            fieldValue = convertFile2String(fieldValue1, fieldValue2);
        } else {
            fieldValue = "";
        }
        Field field = new Field( name, fieldValue, luceneOptions.getStore(), luceneOptions.getIndex(), luceneOptions.getTermVector() );
        field.setBoost( luceneOptions.getBoost() );
        document.add( field );
    }



    private String convertFile2String(String type, byte[] content) {
  }

当我调试&#34; WordDocHandlerBridge.java&#34;时,我能够看到我的内容在Field中设置并添加到文档中,但是当我搜索时我没有得到它。

注意:有一个standolne程序为我重新创建索引,如果我重新创建,我可以看到结果。

任何人都可以帮我解决这个问题。

1 个答案:

答案 0 :(得分:0)

我解决了这个问题,WordDocHandlerBridge中的这个类&#34; convertFile2String&#34;,实际打开文件并读取文件并创建一个String来设置Field中的值,问题是Code没有关闭&# 34;文件&#34;还有一些Luecene没有更新,但是当我加入PDF和​​POI文件时,它们都开始工作了。

这里指出它曾经与Luecene 2.4一起使用,这可能是Lucene 3.6中的新功能,这是一个很好的功能。