Lucene搜索并附带条件

时间:2018-10-16 07:32:26

标签: java lucene

我正在尝试在Lucene中创建一个Java独立应用程序,以满足从XML文件中查找文本的目的。以下是我收到的xml文件的示例:

<firstName>Tom</firstName><lastName>Jobs</lastName><role>admin</role><designation>manager</designation>

下面是索引器的代码: SimpleFileIndexer.java

public class SimpleFileIndexer {
    public static void main(String[] args) throws Exception {
        File indexDir = new File("<path of the index directory>");
        File dataDir = new File("<path of the data directory>");
        String suffix = "txt";
        SimpleFileIndexer indexer = new SimpleFileIndexer();
        int numIndex = indexer.index(indexDir, dataDir, suffix);
        System.out.println("Numer of total files indexed:  " + numIndex);
    }

    @SuppressWarnings("deprecation")
    private int index(File indexDir, File dataDir, String suffix) throws Exception {
        IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), new SimpleAnalyzer(), true,
                IndexWriter.MaxFieldLength.LIMITED);
        indexWriter.setUseCompoundFile(false);
        indexDirectory(indexWriter, dataDir, suffix);
        int numIndexed = indexWriter.maxDoc();
        indexWriter.optimize();
        indexWriter.close();
        return numIndexed;
    }

    private void indexDirectory(IndexWriter indexWriter, File dataDir, String suffix) throws IOException {
        File[] files = dataDir.listFiles();
        for (int i = 0; i < files.length; i++) {
            File f = files[i];
            if (f.isDirectory()) {
                indexDirectory(indexWriter, f, suffix);
            } else {
                indexFileWithIndexWriter(indexWriter, f, suffix);
            }
        }

    }

    private void indexFileWithIndexWriter(IndexWriter indexWriter, File f, String suffix) throws IOException {
        if (f.isHidden() || f.isDirectory() || !f.canRead() || !f.exists()) {
            return;
        }

        if (suffix != null && f.getName().endsWith(suffix)) {
            System.out.println("Indexing file:... " + f.getCanonicalPath());

            Document doc = new Document();
            doc.add(new Field("contents", new FileReader(f)));
            doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES, Field.Index.ANALYZED));

            indexWriter.addDocument(doc);
        }

    }
}

以下是搜索器的代码: SimpleSearcher.java

public class SimpleSearcher {
    public static final String FIELD_CONTENTS = "contents";

    public static void main(String[] args) throws Exception {
        SimpleSearcher searcher = new SimpleSearcher();
        searcher.searchAdvancedIndex();
    }

    @SuppressWarnings("deprecation")
    private void searchAdvancedIndex() throws IOException {
        File indexDir = new File("<path of the indexed file directory>");
        Directory directory = FSDirectory.open(indexDir);
        IndexSearcher searcher = new IndexSearcher(directory);

        Query query1 = new TermQuery(new Term(FIELD_CONTENTS, "<firstName>Tom</firstName>"));
        Query query2 = new TermQuery(new Term(FIELD_CONTENTS, "<lastName>Jobs</lastName>"));

        BooleanQuery booleanQuery = new BooleanQuery();
        booleanQuery.add(query1, BooleanClause.Occur.MUST);
        booleanQuery.add(query2, BooleanClause.Occur.MUST);
        displayQuery(booleanQuery);
        TopDocs topDocs = searcher.search(booleanQuery, 100);
        ScoreDoc[] hits = topDocs.scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println(d.get("filename"));
        }

        System.out.println("Found " + hits.length);
        searcher.close();
    }

    public static void displayQuery(Query query) {
        System.out.println("Query: " + query.toString());
    }
}

我试图找出文件中是否存在文本<firstName>Tom</firstName><lastName>Jobs</lastName>。当我运行SimpleIndexer.java文件时,索引创建成功,但是当我运行SimpleSearcher.java文件时,我没有得到文本。

任何帮助都会非常有帮助和赞赏。

0 个答案:

没有答案