Question

我在umbraco中设置Lucene搜索引擎时遇到了问题。我试图搜索存储在Umbraco创建的默认索引中的数据。搜索方法如下：

        private DictionaryResult GetRowContent(
        Lucene.Net.Highlight.Highlighter highlighter,
        Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer
        ,Lucene.Net.Documents.Document doc1, string criteria)
    {
        JavaScriptSerializer jsScriptSerializer = new JavaScriptSerializer();
        DictionaryResult controls = new DictionaryResult();
        Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new StringReader(doc1.Get("bodyContent")));
        dynamic rowContentHtmlDocument = JObject.Parse(((JValue)doc1.Get("bodyContent")).ToString(CultureInfo.CurrentCulture));
        foreach (dynamic section in rowContentHtmlDocument.sections)
        {
            foreach (var row in section.rows)
            {
                foreach (var area in row.areas)
                {
                    foreach (var control in area.controls)
                    {
                        if (control != null && control.editor != null) // && control.editor.view != null)
                        {
                            JObject rowContentHtml = null;
                            try
                            {
                                rowContentHtml = JObject.Parse(((JContainer)control)["value"].ToString());
                            }
                            catch (Exception e)
                            {
                            }
                            if (rowContentHtml != null)
                            {
                                try
                                {
                                    var macroParamsDictionary = JObject.Parse(((JContainer)rowContentHtml)["macroParamsDictionary"].ToString());
                                    var documentText = macroParamsDictionary.GetValue("dokument");
                                    if (documentText != null)
                                    {
                                        var document = documentText.ToString().Replace("&quot;", "\"");
                                        dynamic documents = jsScriptSerializer.Deserialize<dynamic>(document);
                                        foreach (Dictionary<string, object> doc in documents)
                                        {
                                            if (doc.ContainsKey("FileName") && doc.ContainsKey("DocumentId"))
                                            {
                                                if (doc["FileName"].ToString().Length > 0 && 
                                                    doc["FileName"].ToString().ToLower().Contains(criteria.ToLower()))
                                                {
                                                    controls.Add(new RowResult()
                                                    {
                                                        Type = 0,
                                                        Object = new Document()
                                                        {
                                                            DocumentName = doc["FileName"].ToString(),//highlighter.GetBestFragments(stream, doc["FileName"].ToString(), 1, "..."),
                                                            DocId = Guid.Parse(doc["DocumentId"].ToString())
                                                        } // StringBuilder(@"<a href=" + Url.Action("DownloadDocument", "Document", new { DocumentId = doc["DocumentId"] }) + "> " + @doc["FileName"] + "</a>").ToString()
                                                    }
                                                    );
                                                }
                                            }
                                        }
                                    }
                                }
                                catch (Exception e)
                                {
                                }
                            }
                            else
                            {
                                var text = HtmlRemoval.StripTagsRegex(((JContainer)control)["value"].ToString()).Replace("ë", "e").Replace("ç", "c");
                                var textResultFiltered =  highlighter.GetBestFragments(stream,doc1.Get("bodyContent"), 5, "...");
                                controls.Add(new RowResult()
                                {
                                    Type = 1,
                                    Object = textResultFiltered
                                });
                            }
                        }
                    }
                }
            }
        }
        return controls;
    }

这里我试图从简单的html内容中过滤宏文档并以不同方式呈现。但最后到这一部分

var text = HtmlRemoval.StripTagsRegex(((JContainer)control)["value"].ToString()).Replace("ë", "e").Replace("ç", "c");
                            var textResultFiltered =  highlighter.GetBestFragments(stream,doc1.Get("bodyContent"), 5, "...");
                            controls.Add(new RowResult()
                            {
                                Type = 1,
                                Object = textResultFiltered
                            });

它包含了搜索中的宏。结果我获得了文档属性，但高亮显示的html内容具有如下的宏内容：

6th Edition V413HAV.pdf","FileContent"... Framework 6th Edition V413HAV.pdf","... with Java 8 - 1st Edition (2015) - Copy.pdf"... 4.5 Framework 6th Edition V413HAV.pdf","... And The NET 4.5 Framework 6th Edition V413HAV.pdf" which is coming from Json data of the macro. Any idea how to exclude the macros from searching or to customize the hmtl content not to search on specific macro ? Thanks in advance.

我正在参考此链接创建Hightlighter等... Link to Lucene example

知道如何阻止在宏上搜索或从突出显示的内容中排除它们吗？

Answer 1

如果您只是进行常规搜索，那看起来太复杂了。你知道Umbraco有自己的Lucene“版本”叫做Examine吗？它内置于Umbraco，如果有任何设置可以运行标准搜索，则不需要太多：https://our.umbraco.org/documentation/reference/searching/examine/

我从未在使用Examine的搜索结果中看到过宏或JSON标记，所以也许可以尝试一下？

Answer 2

您可以轻松使用Examine。您只需选择所需的搜索提供程序（config / ExamineSettings.config），这样您就可以选择是否要避免未发布和受保护的内容。然后你只需要做下一段代码，你可以选择你想要搜索的字段或者你不想避免的Dact类型。

string term = "test"

var criteria = ExamineManager.Instance.SearchProviderCollection["ExternalSearcher"].CreateSearchCriteria();
var crawl = criteria.GroupedOr(new string[] { "nodeName", "pageTitle", "metaDescription", "metaKeywords" }, term)
                .Not().Field("nodeTypeAlias", "GlobalSettings")
                .Not().Field("nodeTypeAlias", "Error")
                .Not().Field("nodeTypeAlias", "File")
                .Not().Field("nodeTypeAlias", "Folder")
                .Not().Field("nodeTypeAlias", "Image")
                .Not().Field("excludeFromSearch", "1")
                .Compile();

 ISearchResults SearchResults = ExamineManager.Instance
                .SearchProviderCollection["ExternalSearcher"]
                .Search(crawl);

 IList<JsonSearchResult> results = new List<JsonSearchResult>();

希望这是有道理的。

Answer 3

我尝试过使用以下检查：

SearchQuery = string.Format("+{0}:{1}~", SearchField, criteria);
var Criteria = ExamineManager.Instance
                    .SearchProviderCollection["ExternalSearcher"]
                    .CreateSearchCriteria();
var crawl = Criteria.GroupedOr(new string[] { "bodyContent", "nodeName" }, criteria)
                    .Not()
                    .Field("umbracoNaviHide", "1")
                    .Not()
                    .Field("nodeTypeAlias", "Image")
                    .Compile();
IEnumerable<Examine.SearchResult> SearchResults1 = ExamineManager.Instance
                    .SearchProviderCollection["ExternalSearcher"]
                    .Search(crawl);

我使用了两种方法突出显示如下文字，但这些方法效率不高!!!我有一些没有突出显示任何文字的链接。

        public string GetHighlight(string value, string highlightField, BaseLuceneSearcher searcher, string luceneRawQuery)
    {
        var query = GetQueryParser(highlightField).Parse(luceneRawQuery);
        var scorer = new QueryScorer(searcher.GetSearcher().Rewrite(query));

        var highlighter = new Highlighter(HighlightFormatter, scorer);

        var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
        return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
    }
    protected QueryParser GetQueryParser(string highlightField)
    {
        if (!QueryParsers.ToString().Contains(highlightField))
        {
            var temp = new QueryParser(_luceneVersion, highlightField, HighlightAnalyzer);
            return temp;
        }
        return null;
    }

如果你有任何在Examine中突出显示的样本非常有效，我会很感激..

从umbraco中搜索中排除宏

3 个答案: