我是Lucene的新手。有人可以提供一些信息,作为lucene.net metaphone实施中出了什么问题。索引已创建,但是当我搜索“ Kool”一词时,尽管通过变音符号分析器对其进行了索引,但未获得任何结果。但是,如果搜索到的单词“ cool”是实际的单词,则会获得结果。
public virtual void testKoolKat(string sIndexPath,string sDatatoCheck)
{
Lucene.Net.Store.Directory luceneIndexDirectory = new MMapDirectory(new System.IO.DirectoryInfo(sIndexPath));
Analyzer analyze = new MetaphoneReplacementAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyze);
IndexWriter writer = new IndexWriter(luceneIndexDirectory, conf);
Document doc = new Document();
doc.Add(new TextField("contents", "cool cat", Field.Store.YES));
writer.AddDocument(doc);
doc = null;
writer.Commit();
IndexReader reader = DirectoryReader.Open(luceneIndexDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
Query query = (new QueryParser(LuceneVersion.LUCENE_48, "contents", analyze)).Parse(sDatatoCheck);
var collector = TopScoreDocCollector.Create(500, false);
searcher.Search(query, collector);
var oHitColl = collector.GetTopDocs().ScoreDocs;
for (int i = 0; i < oHitColl.Length; i++)
{
Document oDoc = searcher.Doc(oHitColl[i].Doc);
string sDocID = oDoc.Get("contents").Trim();
}
luceneIndexDirectory.Dispose();
writer.Dispose();
reader.Dispose();
searcher = null;
}
public class MetaphoneReplacementAnalyzer : Analyzer
{
public virtual TokenStream tokenStream(string fieldName, TextReader reader)
{
return new MetaphoneReplacementFilter(new LetterTokenizer(LuceneVersion.LUCENE_48, reader));
}
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
string[] sStopWordsList = new string[] { "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" };
Collection<string> stopWords = new Collection<string>(sStopWordsList);
CharArraySet stopSet = new CharArraySet(LuceneVersion.LUCENE_48, stopWords, false);
Tokenizer tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_48, reader);
TokenFilter filter = new LowerCaseFilter(LuceneVersion.LUCENE_48, tokenizer);
filter = new StopFilter(LuceneVersion.LUCENE_48, filter, stopSet);
return new TokenStreamComponents(tokenizer, filter);
}
}
public class MetaphoneReplacementFilter : TokenFilter
{
public const string METAPHONE = "metaphone";
private Metaphone metaphoner = new Metaphone();
private CharTermAttribute termAttr;
private TypeAttribute typeAttr;
public MetaphoneReplacementFilter(TokenStream input) : base(input)
{
termAttr = AddAttribute<CharTermAttribute>();// AddAttribute(typeof(CharTermAttribute));
typeAttr = AddAttribute<TypeAttribute>();// (typeof(TypeAttribute));
}
public override bool IncrementToken()
{
//if (!input.incrementToken())
//{
// return false;
//}
string encoded;
encoded = metaphoner.Encode(termAttr.ToString());
termAttr.Append(encoded);
typeAttr.Type = METAPHONE;
return true;
}
}
谢谢。