说明:
set
{
_selectedProject = value;
YourCommand.Execute(null);
}
将包含我必须查看的字符串序列$enumValues
将具有来自XML(用于循环)的元素值,即我必须按照上述保持的顺序匹配的字符串以下所有三项尝试均使我扩展了树缓存完整错误。有大约470000资产,即我正在查询的XML。
如何调整这些查询以避免扩展的树缓存错误?
方法1:
$assetSubGroup
方法2:
let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report0.txt")
let $enumValues := (:all sequence of strings goes here :)
let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))
let $result1 := concat('asset-id' , "|", 'upi', "|", 'assetSubGroup', "|", 'asset-type', "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier', "|", 'mm-project-id', "|" , 'sap-project-id' , " ")
let $result2 :=
for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
let $flag := map:keys($map2 - $map1)
return
if ($flag)
then(
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
)
else (
if($assetSubGroup) then()
else (
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
))
let $result3 := ($result1, $result2)
return xdmp:save($report-uri, text{$result3}), xdmp:elapsed-time()
方法3:
let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report1.txt")
let $enumValues := (:all string value sequence goes here that has to match:)
let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))
let $result1 :=( concat('asset-id' , "|", 'upi', "|", 'assetSubGroup', "|", 'asset-type', "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier', "|", 'mm-project-id', "|" , 'sap-project-id' , " "),
for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
let $flag := map:keys($map2 - $map1)
return
if ($flag)
then(
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
)
else (
if($assetSubGroup) then()
else (
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
))
)
return xdmp:save($report-uri, text{$result1}), xdmp:elapsed-time()
答案 0 :(得分:2)
方法中最大的罪魁祸首是您试图将整个结果合并为一个字符串。在将结果插入文本节点之前,还需要将其捕获到变量中。
以下方法会更好,因为它允许MarkLogic在写入磁盘时流式传输数据:
is/2
HTH!
答案 1 :(得分:1)
如果发现编写“流式”查询时遇到麻烦,另一种选择是使用批处理工具,例如CORB2,该工具使用多个线程为每个文档执行一个模块以产生其输出。在一个单独的事务中并将结果收集到最终输出文件中。通过将工作分解为单独的事务,您不必担心扩展的树缓存错误或超时,并且可以调整线程数以并行执行更多工作,并比单个查询执行更快地完成工作。
用于生成|
带分隔符的文本文件的示例CORB2选项文件如下所示(您需要根据环境调整 XCC-CONNECTION-URI 值):< / p>
XCC-CONNECTION-URI=xcc://user:password@host:port
THREAD-COUNT=8
URIS-MODULE=selector.xqy|ADHOC
PROCESS-MODULE=process.xqy|ADHOC
PROCESS-TASK=com.marklogic.developer.corb.ExportBatchToFileTask
EXPORT-FILE-DIR=D:\output\
EXPORT-FILE-NAME=get-asset-sub-group-values_report.txt
PRE-BATCH-TASK=com.marklogic.developer.corb.PreBatchUpdateFileTask
EXPORT-FILE-TOP-CONTENT=asset-id|upi|assetSubGroup|asset-type|asset-sub-type|originator|originator-identifier|mm-project-id|sap-project-id
BATCH-URI-DELIM=|
创建URI选择器模块selector.xqy
,该模块将找到所有要处理的URI:
xquery version "1.0-ml";
let $uris := cts:uris("", (), cts:and-query((
cts:collection-query("metadata-search"),
cts:element-value-query(xs:QName("AssetID"), "*")
)) )
return (fn:count($uris), $uris)
创建将为每个URI调用的流程模块process.xqy
:
xquery version "1.0-ml";
declare variable $URI external;
let $each-search-copy := fn:doc($URI)/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $flag := ($assetSubGroup eq $enumValues)
return
if ($flag)
then()
else (
(: NOTE you could use string-join() instead of concat()
string-join(($asset-id, $upi, $assetSubGroup, $asset-type, $asset-sub-type, $originator, $originator-identifier, $mm-project-id, $sap-project-id), "|")
:)
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id)
))
像这样调用CORB作业(调整XCC和CORB jar和属性文件的路径和文件名):
java -server -cp .:marklogic-xcc-8.0.8.jar:marklogic-corb-2.4.1.jar
-DOPTIONS-FILE=myjob.properties com.marklogic.developer.corb.Manager
或者,如果您使用的是ml-gradle,use the corb task