您好我已经编写了一个类来测试lucene索引创建低于错误。
Lucene 6.5.1版。
Exception in thread "main" java.lang.IllegalStateException: TokenStream contract violation: reset()/close() call missing, reset() called multiple times, or subclass does not call super.reset(). Please see Javadocs of TokenStream class for more information about the correct consuming workflow.
at org.apache.lucene.analysis.Tokenizer$1.read(Tokenizer.java:109)
at java.io.Reader.read(Reader.java:140)
at org.apache.lucene.analysis.pattern.PatternTokenizer.fillBuffer(PatternTokenizer.java:162)
at org.apache.lucene.analysis.pattern.PatternTokenizer.reset(PatternTokenizer.java:151)
at org.apache.lucene.analysis.TokenFilter.reset(TokenFilter.java:70)
at org.apache.lucene.analysis.TokenFilter.reset(TokenFilter.java:70)
at org.apache.lucene.analysis.TokenFilter.reset(TokenFilter.java:70)
at org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter.reset(RemoveDuplicatesTokenFilter.java:79)
at org.apache.lucene.analysis.TokenFilter.reset(TokenFilter.java:70)
at org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.reset(EdgeNGramTokenFilter.java:110)
at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:742)
at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:447)
at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:403)
at org.apache.lucene.index.DocumentsWriterPerThread.updateDocuments(DocumentsWriterPerThread.java:273)
at org.apache.lucene.index.DocumentsWriter.updateDocuments(DocumentsWriter.java:433)
at org.apache.lucene.index.IndexWriter.updateDocuments(IndexWriter.java:1382)
at org.apache.lucene.index.IndexWriter.addDocuments(IndexWriter.java:1358)
请查看下面的独立程序,请检查分析器创建组件部分我没有得到6.5.1版本的正确示例。
下面的代码用于使用lucene 6.5.1
为Id和name创建索引public class LuceneWriteIndexExample
{
private static final String INDEX_DIR = "c:/temp/lucene6index";
static private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
PatternTokenizer tokenizer = new PatternTokenizer(Pattern.compile("[/\\-\\t ,;\\.]+"), -1);
TokenStream filter = new LowerCaseFilter(tokenizer);
filter = new TrimFilter(filter);
//filter = new ICUFoldingFilter(filter);
filter = new RemoveDuplicatesTokenFilter(filter);
filter = new EdgeNGramTokenFilter(filter, 1, 25);
return new TokenStreamComponents(tokenizer, filter);
}
};
public static void main(String[] args) throws Exception
{
IndexWriter writer = createWriter();
List<Document> documents = new ArrayList();
Document document1 = createDocument(1, "Test 1");
documents.add(document1);
Document document2 = createDocument(2, "Test 2");
documents.add(document2);
//Let's clean everything first
writer.deleteAll();
writer.addDocuments(documents);
// writer.flush();
writer.commit();
writer.close();
}
private static Document createDocument(Integer id, String name) throws IOException {
Document document = new Document();
document.add(new StringField("id", id.toString() , Field.Store.YES));
document.add(new TextField("name", name, Field.Store.YES));
TokenStream tokenStream = analyzer.tokenStream("customeName", name);
document.add(new TextField("customeName", tokenStream));
return document;
}
private static IndexWriter createWriter() throws IOException
{
FSDirectory dir = FSDirectory.open(Paths.get(INDEX_DIR));
IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
IndexWriter writer = new IndexWriter(dir, config);
return writer;
}
}