我有一个文件名thatfeelwhen.pdf
,当我搜索使用像“那个”或“感觉”这样的词时,我不会受到打击,当我输入“when”或整个文件名时。我正在使用标准分析仪。我怎样才能让Lucene的搜索者匹配所有内容?我的搜索查询似乎与文件中的内容匹配,但不匹配文件名。
public partial class _Default : Page
{
Directory finalDirectory = null;
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
其他方法中的代码:
private static void AddTextToIndex(string filename, string pdfBody, IndexWriter writer)
{
Document doc = new Document();
doc.Add(new Field("fileName", filename.ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("pdfBody", pdfBody.ToString(), Field.Store.NO, Field.Index.ANALYZED));
writer.AddDocument(doc);
}
private static Directory buildIndex(Analyzer analyzer)
{
string[] syllabusFiles = System.IO.Directory.GetFiles(@"C:\mywebsite\files\forms");
Directory directory = FSDirectory.Open(new DirectoryInfo(@"C:\mywebsite\files\LuceneIndex"));
var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
int j = 0;
while (j < syllabusFiles.Length)
{
string pdfTextExtracted = pdfText(syllabusFiles[j]);
string fileNameOnly = syllabusFiles[j].Replace("C:\\website\\files\\forms", "");
AddTextToIndex(fileNameOnly, pdfTextExtracted, writer);
j++;
}
writer.Optimize();
writer.Dispose();
return directory;
}
protected void txtBoxSearchPDF_Click(object sender, EventArgs e)
{
if (txtBoxSearchString.Text == "")
{
lblNoSearchString.Visible = true;
}
else if (txtBoxSearchString.Text == "build_index")
{
this.finalDirectory = buildIndex(this.analyzer);
}
else
{
//searching PDF text
lblNoSearchString.Visible = false;
StringBuilder sb = new StringBuilder();
this.finalDirectory = FSDirectory.Open(new DirectoryInfo(@"C:\mywebsite\files\LuceneIndex"));
IndexReader indexReader = IndexReader.Open(this.finalDirectory, true);
Searcher indexSearch = new IndexSearcher(indexReader);
string searchQuery = txtBoxSearchString.Text;
var fields = new[] { "fileName", "pdfBody" };
var queryParser = new MultiFieldQueryParser(Version.LUCENE_30, fields, this.analyzer);
Query query;
try
{
query = queryParser.Parse(searchQuery.Trim());
}
catch (ParseException)
{
query = queryParser.Parse(QueryParser.Escape(searchQuery.Trim()));
}
TopDocs resultDocs = indexSearch.Search(query, indexReader.MaxDoc);
var hits = resultDocs.ScoreDocs;
foreach (var hit in hits)
{
var documentFromSearcher = indexSearch.Doc(hit.Doc);
string getResult = documentFromSearcher.Get("fileName");
string formattedResult = getResult.Replace(" ", "%20");
sb.AppendLine(@"<a href=https://website.com/search/forms/" + formattedResult+ ">" + getResult+"</a>");
sb.AppendLine("<br>");
}
答案 0 :(得分:1)
我选择使用Analyzer analyzer = new SingleCharTokenAnalyzer();
并获得更好的结果。
我尝试过简单,标准,空白和关键字分析器,但没有一个真正适合我的需求,而无需通过创建额外的工作来定制它们。