我首次使用Lucene.Net dll进行全文搜索。 我的情况(示例): - 我有更多的List项目entites与不同的字段索引。例如导演{id,type,title,firstname,...}和Car {id,type,name,color,owner ...}。 当我尝试使用Multifield搜索" Rob"时,一切正常。结果包含董事和汽车的文件。
问题: 我可以知道在哪个领域找到了结果?标题,所有者..? 我喜欢将解析后的结果作为MyParsedResult列表{id,type,content}返回,其中content字段将包含doc的相关字段值(例如doc.Get(" firstname"))
有可能吗?
非常感谢你。
答案 0 :(得分:1)
我根据您的需要修改了this answer
class TVM : Lucene.Net.Index.TermVectorMapper
{
public HashSet<Lucene.Net.Index.Term> FoundTerms = new HashSet<Lucene.Net.Index.Term>();
HashSet<Lucene.Net.Index.Term> _AllTerms = new HashSet<Lucene.Net.Index.Term>();
public TVM(Lucene.Net.Search.Query q, Lucene.Net.Index.IndexReader r) : base()
{
q.Rewrite(r).ExtractTerms(_AllTerms);
}
public override void SetExpectations(string field, int numTerms, bool storeOffsets, bool storePositions)
{
}
public override void SetDocumentNumber(int documentNumber)
{
FoundTerms.Clear();
}
public override void Map(string term, int frequency, Lucene.Net.Index.TermVectorOffsetInfo[] offsets, int[] positions)
{
var fountTerm = _AllTerms.FirstOrDefault(x => x.Text == term);
if (fountTerm != null) FoundTerms.Add(fountTerm);
}
}
void TermVectorMapperTest()
{
var dir = new Lucene.Net.Store.RAMDirectory();
//Index
using (var writer = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED))
{
Lucene.Net.Documents.Document d = null;
d = new Lucene.Net.Documents.Document();
d.Add(new Lucene.Net.Documents.Field("field1", "microscope aaa", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
d.Add(new Lucene.Net.Documents.Field("field2", "microswave bbb", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.AddDocument(d);
d = new Lucene.Net.Documents.Document();
d.Add(new Lucene.Net.Documents.Field("field2", "microsoft ccc", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.AddDocument(d);
d = new Lucene.Net.Documents.Document();
d.Add(new Lucene.Net.Documents.Field("field1", "zzz", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.AddDocument(d);
}
//Search
using (var indexReader = Lucene.Net.Index.IndexReader.Open(dir, true))
{
var indexSearcher = new Lucene.Net.Search.IndexSearcher(indexReader);
var queryParser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30));
queryParser.MultiTermRewriteMethod = Lucene.Net.Search.MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
var query = queryParser.Parse("field1:micro* field2:micro*");
var results = indexSearcher.Search(query, 5);
TVM tvm = new TVM(query, indexReader);
foreach(var sd in results.ScoreDocs)
{
Console.Write("DOCID:" + sd.Doc + " > ");
indexReader.GetTermFreqVector(sd.Doc, tvm);
Console.WriteLine(String.Join(" ", tvm.FoundTerms.Select(term => "[" + term.Field + ":" + term.Text + "]")));
}
}
}