Lucene.net Metaphone实现错误

时间:2018-11-15 11:56:43

标签: c# lucene.net

我是Lucene的新手。有人可以提供一些信息,作为lucene.net metaphone实施中出了什么问题。索引已创建,但是当我搜索“ Kool”一词时,尽管通过变音符号分析器对其进行了索引,但未获得任何结果。但是,如果搜索到的单词“ cool”是实际的单词,则会获得结果。

public virtual void testKoolKat(string sIndexPath,string sDatatoCheck)
{

    Lucene.Net.Store.Directory luceneIndexDirectory = new MMapDirectory(new System.IO.DirectoryInfo(sIndexPath));           

    Analyzer analyze = new MetaphoneReplacementAnalyzer();   

    IndexWriterConfig conf = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyze);
    IndexWriter writer = new IndexWriter(luceneIndexDirectory, conf);

    Document doc = new Document();            
    doc.Add(new TextField("contents", "cool cat", Field.Store.YES));
    writer.AddDocument(doc);
    doc = null;
    writer.Commit();

    IndexReader reader = DirectoryReader.Open(luceneIndexDirectory);
    IndexSearcher searcher = new IndexSearcher(reader);
    Query query = (new QueryParser(LuceneVersion.LUCENE_48, "contents", analyze)).Parse(sDatatoCheck);             

    var collector = TopScoreDocCollector.Create(500, false);

    searcher.Search(query, collector);
    var oHitColl = collector.GetTopDocs().ScoreDocs;
    for (int i = 0; i < oHitColl.Length; i++)
    {                
        Document oDoc = searcher.Doc(oHitColl[i].Doc);
        string sDocID = oDoc.Get("contents").Trim();
    }

    luceneIndexDirectory.Dispose();
    writer.Dispose();
    reader.Dispose();
    searcher = null;

}


public class MetaphoneReplacementAnalyzer : Analyzer
{
     public virtual TokenStream tokenStream(string fieldName, TextReader reader)
     {
           return new MetaphoneReplacementFilter(new LetterTokenizer(LuceneVersion.LUCENE_48, reader));

     }
      protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
     {

    string[] sStopWordsList = new string[] { "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" };
    Collection<string> stopWords = new Collection<string>(sStopWordsList);             
    CharArraySet stopSet = new CharArraySet(LuceneVersion.LUCENE_48, stopWords, false);


    Tokenizer tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_48, reader);
    TokenFilter filter = new LowerCaseFilter(LuceneVersion.LUCENE_48, tokenizer);
    filter = new StopFilter(LuceneVersion.LUCENE_48, filter, stopSet);

    return new TokenStreamComponents(tokenizer, filter);
    }
}

public class MetaphoneReplacementFilter : TokenFilter
{
      public const string METAPHONE = "metaphone";
      private Metaphone metaphoner = new Metaphone();
      private CharTermAttribute termAttr;
      private TypeAttribute typeAttr;
      public MetaphoneReplacementFilter(TokenStream input) : base(input)
      {
            termAttr = AddAttribute<CharTermAttribute>();//                AddAttribute(typeof(CharTermAttribute));
            typeAttr = AddAttribute<TypeAttribute>();// (typeof(TypeAttribute));
      }

      public override bool IncrementToken()
      {
            //if (!input.incrementToken())
           //{
            //    return false;
            //}
             string encoded;
             encoded = metaphoner.Encode(termAttr.ToString());
             termAttr.Append(encoded);
             typeAttr.Type = METAPHONE;
             return true;
       }
}

谢谢。

0 个答案:

没有答案