我正在尝试编写一个XQuery函数来对分隔符上的字符串进行标记,同时忽略嵌套括号表达式中的分隔符,例如。
tokenizeOutsideBrackets("1,(2,3)" , ",") => ( "1" , "(2,3)" )
tokenizeOutsideBrackets("1,(2,(3,4))" , ",") => ( "1" , "(2,(3,4))" )
tokenizeOutsideBrackets("1,(2,(3,(4,5)))" , ",") => ( "1" , "(2,(3,(4,5)))" )
tokenizeOutsideBrackets("1,(2,(3,4),5),6" , ",") => ( "1" , "(2,(3,4),5)" , "6" )
如果我有递归正则表达式或命令式语言,这将是相当微不足道的,但我很难找到一种简单,简单的方法在XQuery中执行此操作。
谢谢!
答案 0 :(得分:1)
这个XQuery表达式:
tokenize(replace('1,(2,(3,4),5),6','([0123456789]+|\(.*\))(,)?','$1;'),';')
输出:
1 (2,(3,4),5) 6
更新:如果有像'1,(2,3),(4,5),6'
这样的字符串,那么你需要一个解析器用于这个语法:
exp ::= term ( ',' term ) *
term ::= num | '(' exp ')'
num ::= ( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ) +
答案 1 :(得分:0)
执行此操作的一种方法是首先拆分,然后将带有不平衡括号的令牌连接到其右侧邻居。
以下代码可为您提供所需的结果。它使用fn:tokenize进行拆分,然后(tail-)递归处理结果标记,当前一个标记具有不匹配的“(”和“)”计数时连接。这种方法存在一些缺陷,即无法正确匹配左右括号,并将$ delimiter视为模式和文字。需要更多编码才能正确处理,但是你可能会有这个想法。
declare function local:tokenizeOutsideBrackets($string, $delimiter)
{
local:joinBrackets(tokenize($string, $delimiter), $delimiter, ())
};
declare function local:joinBrackets($tokens, $delimiter, $result)
{
if (empty($tokens)) then
$result
else
let $last := $result[last()]
let $new-result :=
if (string-length(translate($last, "(", ""))
= string-length(translate($last, ")", ""))) then
($result, $tokens[1])
else
($result[position() < last()], concat($last, $delimiter, $tokens[1]))
return local:joinBrackets($tokens[position() > 1], $delimiter, $new-result)
};
答案 2 :(得分:0)
一直在玩,下面的功能似乎有用,虽然我不禁想到有一种更简单的方法。
此代码使用functx:index-of-string函数来查找所有分隔符的索引。然后它尝试每个都找到第一个分隔符,左边的所有内容都有相同数量的开始和结束括号。找到此内容后,将重复此分隔符右侧的所有内容。
declare function local:tokenizeOutsideBrackets(
$arg as xs:string?,
$delimiter as xs:string) as xs:string*
{
if (contains($arg, $delimiter))
then
(:find positions of all the delimiters:)
let $delimiterPositions := (
functx:index-of-string($arg,$delimiter),
string-length($arg)+1 (:Add in end of string too:)
)
(:strip out all the fragments that have matching
brackets to the left of each delimiter:)
let $fragments :=
for $endPos in $delimiterPositions
let $candidateString := substring($arg,1,$endPos - 1)
return
if (local:hasMatchedBrackets($candidateString))
then $candidateString
else ()
let $firstFragment := $fragments[1]
let $endPos := string-length($firstFragment)
(:recursively return the first matching fragment,
plus the fragments in the remaining string:)
return
(
$firstFragment,
local:tokenizeOutsideBrackets(
substring(
$arg,
$endPos+string-length($delimiter)+1,
string-length($arg) - $endPos -(string-length($delimiter))
),
$delimiter
)
)
else if ($arg='') then () else ($arg)
};
declare function local:hasMatchedBrackets($arg as xs:string) as xs:boolean
{
count(tokenize($arg,'\(')) = count(tokenize($arg,'\)'))
};