Marklogic搜索语法问题

时间:2017-01-18 02:18:52

标签: marklogic marklogic-8

我的印象是,当搜索词组是双引号时,它会进行精确搜索。但我也得到了部分比赛(即使比分很低)。我期待它应该完全匹配。以下是我的示例代码..我错过了什么

xquery version "1.0-ml";
declare namespace html = "http://www.w3.org/1999/xhtml";
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";
import module namespace functx = "http://www.functx.com"  at "/MarkLogic/functx/functx-1.0-doc-2007-01.xqy";

let $q := '(“protein degradation”) AND ((context:PCS)) AND (sort:date_desc)'

let $options := 
  <options xmlns="http://marklogic.com/appservices/search">
    <additional-query>
        <cts:collection-query xmlns:cts="http://marklogic.com/cts">
            <cts:uri>http://XXXXX/type/envelope</cts:uri>
        </cts:collection-query>
    </additional-query>
    <operator name="sort">
        <state name="date_desc">
            <sort-order type="xs:dateTime" direction="descending">
                <field name="upload_date"/>
            </sort-order>
        </state>
        <state name="date_asc">
            <sort-order type="xs:dateTime" direction="ascending">
                <field name="upload_date"/>
            </sort-order>
        </state>
    </operator>
    <constraint name="context">
        <range type="xs:string" facet="true">
            <element name="context" ns="http://XXXXX/metadata"/>
            <facet-option>frequency-order</facet-option>
            <facet-option>descending</facet-option>
        </range>
    </constraint>
    <constraint name="type">
        <range type="xs:string" facet="true">
            <element name="type" ns="http://XXXXX/metadata"/>
            <facet-option>frequency-order</facet-option>
            <facet-option>descending</facet-option>
        </range>
    </constraint>
    <term>
        <term-option>case-insensitive</term-option>
        <term-option>punctuation-insensitive</term-option>
        <term-option>whitespace-insensitive</term-option>
        <term-option>wildcarded</term-option>
    </term>
     <search-option>unfiltered</search-option>
</options>

let $start := 1
let $page-length :=1

let $result := search:search($q, $options, $start, $page-length) 
return $result

以下是我得到的......我很困惑..我不是为什么以下结果作为命中

<search:result index="1" uri="/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml" path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")" score="58624" confidence="0.329381" fitness="0.5856407">
  <search:snippet>
    <search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:metadata/*:context">
      <search:highlight>PCS</search:highlight>
    </search:match>
    <search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:extractedText/*:html/*:body/*:p[1]">
Analysis of the Safety Risks Associated with Hydrazine as a <search:highlight>Degradation</search:highlight> Product in LCIG RD12714 ra-rd12714-hydrazine</search:match>
    <search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:extractedText/*:html/*:body/*:p[9]">...of the Safety Risks Associated with Hydrazine as a <search:highlight>Degradation</search:highlight> Product in...</search:match>
  </search:snippet>
</search:result>

如果我们在上面的结果中注意到它与<search:highlight>Degradation</search:highlight>匹配...当我们尝试进行精确搜索时,为什么会进行部分匹配?

-----添加了搜索:解析输出------

<cts:and-query xmlns:cts="http://marklogic.com/cts" xmlns:search="http://marklogic.com/appservices/search">
  <cts:word-query>
    <cts:text xml:lang="en">“protein</cts:text>
    <cts:option>case-insensitive</cts:option>
    <cts:option>punctuation-insensitive</cts:option>
    <cts:option>whitespace-insensitive</cts:option>
    <cts:option>wildcarded</cts:option>
  </cts:word-query>
  <cts:word-query>
    <cts:text xml:lang="en">degradation”</cts:text>
    <cts:option>case-insensitive</cts:option>
    <cts:option>punctuation-insensitive</cts:option>
    <cts:option>whitespace-insensitive</cts:option>
    <cts:option>wildcarded</cts:option>
  </cts:word-query>
  <cts:element-range-query operator="=">
    <cts:element xmlns:_1="http://XXXXX/metadata">_1:context</cts:element>
    <cts:value xsi:type="xs:string" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">PCS</cts:value>
    <cts:option>collation=http://marklogic.com/collation/</cts:option>
  </cts:element-range-query>
  <cts:annotation operator-ref="sort" state-ref="date_desc">
  </cts:annotation>
</cts:and-query>

2 个答案:

答案 0 :(得分:2)

我认为问题是花哨的引用:

Update us_loc_data SET GeoLocation = geography::Point(B.lat,b.Lng,4326 )
 From  us_loc_data A
 Join  YourLatLngTable B on (A.Zip=B.Zip)

给出:

import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";

search:parse('"protein degradation"')

,同时:

<cts:word-query xmlns:cts="http://marklogic.com/cts">
  <cts:text xml:lang="en">protein degradation</cts:text>
</cts:word-query>

给出:

import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";

search:parse('“protein degradation”')

答案 1 :(得分:0)

我建议您使用名为&#34; exact&#34;

的术语选项

来自文档: &#34;确切&#34; 完全匹配查询。 &#34;区分大小写&#34;,&#34;变音符号敏感&#34;,&#34;标点符号敏感&#34;,&#34;空白敏感&#34;,&#34; untemmed&#34;,&#34; unwildcarded&#34;。