扩展树缓存已满错误需要调整查询

时间:2018-08-24 10:39:01

标签: marklogic marklogic-8 query-tuning

说明:

  • set { _selectedProject = value; YourCommand.Execute(null); } 将包含我必须查看的字符串序列
  • $enumValues将具有来自XML(用于循环)的元素值,即我必须按照上述保持的顺序匹配的字符串
  • 如果不匹配,我必须保留几个元素值并返回。

以下所有三项尝试均使我扩展了树缓存完整错误。有大约470000资产,即我正在查询的XML。

如何调整这些查询以避免扩展的树缓存错误?

方法1:

$assetSubGroup

方法2:

let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"

let $report-uri := concat($output-dir, $query-name, "_report0.txt")

let $enumValues := (:all sequence of strings goes here :)

let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))

let $result1 := concat('asset-id' , "|",  'upi',  "|", 'assetSubGroup',  "|",  'asset-type',  "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier',  "|",  'mm-project-id',  "|" , 'sap-project-id' , "
")
let $result2 :=
  for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
    let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
    let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
    let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
    let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
    let $originator := $each-search-copy/biblioCore/originator[1]/text()
    let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
    let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
    let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
    let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
    let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
    let $flag := map:keys($map2 - $map1)
    return
        if ($flag)
            then(

                     concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")
                 )
            else (
                  if($assetSubGroup) then() 
                  else (     

                        concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")

            ))
let $result3 := ($result1, $result2)    
return xdmp:save($report-uri, text{$result3}), xdmp:elapsed-time()

方法3:

let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report1.txt")

let $enumValues := (:all string value sequence goes here that has to match:)

let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))

let $result1 :=( concat('asset-id' , "|",  'upi',  "|", 'assetSubGroup',  "|",  'asset-type',  "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier',  "|",  'mm-project-id',  "|" , 'sap-project-id' , "
"),

  for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
    let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
    let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
    let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
    let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
    let $originator := $each-search-copy/biblioCore/originator[1]/text()
    let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
    let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
    let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
    let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
    let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
    let $flag := map:keys($map2 - $map1)
    return
        if ($flag)
            then(

                     concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")
                 )
            else (
                  if($assetSubGroup) then() 
                  else (     

                        concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")

            ))
            )

return xdmp:save($report-uri, text{$result1}), xdmp:elapsed-time()

2 个答案:

答案 0 :(得分:2)

方法中最大的罪魁祸首是您试图将整个结果合并为一个字符串。在将结果插入文本节点之前,还需要将其捕获到变量中。

以下方法会更好,因为它允许MarkLogic在写入磁盘时流式传输数据:

is/2

HTH!

答案 1 :(得分:1)

如果发现编写“流式”查询时遇到麻烦,另一种选择是使用批处理工具,例如CORB2,该工具使用多个线程为每个文档执行一个模块以产生其输出。在一个单独的事务中并将结果收集到最终输出文件中。通过将工作分解为单独的事务,您不必担心扩展的树缓存错误或超时,并且可以调整线程数以并行执行更多工作,并比单个查询执行更快地完成工作。

用于生成|带分隔符的文本文件的示例CORB2选项文件如下所示(您需要根据环境调整 XCC-CONNECTION-URI 值):< / p>

XCC-CONNECTION-URI=xcc://user:password@host:port
THREAD-COUNT=8
URIS-MODULE=selector.xqy|ADHOC
PROCESS-MODULE=process.xqy|ADHOC
PROCESS-TASK=com.marklogic.developer.corb.ExportBatchToFileTask
EXPORT-FILE-DIR=D:\output\
EXPORT-FILE-NAME=get-asset-sub-group-values_report.txt
PRE-BATCH-TASK=com.marklogic.developer.corb.PreBatchUpdateFileTask
EXPORT-FILE-TOP-CONTENT=asset-id|upi|assetSubGroup|asset-type|asset-sub-type|originator|originator-identifier|mm-project-id|sap-project-id
BATCH-URI-DELIM=|

创建URI选择器模块selector.xqy,该模块将找到所有要处理的URI:

xquery version "1.0-ml";
let $uris := cts:uris("", (), cts:and-query((
  cts:collection-query("metadata-search"), 
  cts:element-value-query(xs:QName("AssetID"), "*")
)) )
return (fn:count($uris), $uris)

创建将为每个URI调用的流程模块process.xqy

xquery version "1.0-ml";

declare variable $URI external;

let $each-search-copy := fn:doc($URI)/metadata

let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $flag := ($assetSubGroup eq $enumValues)
return
    if ($flag)
        then()
        else (
              (: NOTE you could use string-join() instead of concat()

                 string-join(($asset-id, $upi, $assetSubGroup, $asset-type, $asset-sub-type, $originator, $originator-identifier, $mm-project-id, $sap-project-id), "|")  

              :)
              concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id)
             ))

像这样调用CORB作业(调整XCC和CORB jar和属性文件的路径和文件名):

java -server -cp .:marklogic-xcc-8.0.8.jar:marklogic-corb-2.4.1.jar
    -DOPTIONS-FILE=myjob.properties com.marklogic.developer.corb.Manager

或者,如果您使用的是ml-gradleuse the corb task