lucene的问题

时间:2013-08-18 20:29:48

标签: java search lucene

package avajava;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.IOException;

public class HelloLucene {
   public static void main(String[] args) throws IOException, ParseException {
      // 0. Specify the analyzer for tokenizing text.
      //    The same analyzer should be used for indexing and searching
      StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

      // 1. create the index
      Directory index = new RAMDirectory();

      IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);

      IndexWriter w = new IndexWriter(index, config);
      addDoc(w, "Lucene in Action", "193398817");
      addDoc(w, "Lucene for office use", "55320055Z");
      addDoc(w, "Managing Gigabytes", "55063554A");
      addDoc(w, "The Art of Computer Science", "9900333X");
      w.close();

      // 2. query
      String querystr = "title:of";

      // the "title" arg specifies the default field to use
      // when no field is explicitly specified in the query.
      Query q = new QueryParser(Version.LUCENE_44, "title", analyzer).parse(querystr);

      // 3. search
      int hitsPerPage = 10;
      IndexReader reader = DirectoryReader.open(index);
      IndexSearcher searcher = new IndexSearcher(reader);
      TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
      searcher.search(q, collector);
      ScoreDoc[] hits = collector.topDocs().scoreDocs;

      // 4. display results
      System.out.println("Found " + hits.length + " hits.");
      for(int i=0;i<hits.length;++i) {
         int docId = hits[i].doc;
         Document d = searcher.doc(docId);
         System.out.println((i + 1) + ". " + d.get("isbn") + "\t" + d.get("title"));
      }

      // reader can only be closed when there
      // is no need to access the documents any more.
      reader.close();
  }

  private static void addDoc(IndexWriter w, String title, String isbn) throws IOException {
     Document doc = new Document();
     doc.add(new TextField("title", title, Field.Store.YES));

     // use a string field for isbn because we don't want it tokenized
     doc.add(new StringField("isbn", isbn, Field.Store.YES));
     w.addDocument(doc);
  }

}

我从一些例子中得到了上面的代码。但是我没有根据查询字符串(“title:of”)得到预期的结果。

预期输出:

发现2次点击。  1. 55320055Z Lucene供办公室使用  2. 9900333X计算机科学的艺术

请帮忙。

1 个答案:

答案 0 :(得分:3)

您需要使用一组空的停用词构造StandardAnalyzer。 'of'是一个步骤。

Lucene StandardAnalyzer