听到的是XML。我想在 15/02/2012到24/02/2012 的日期范围内作者发布标题数量从最高到最低(标题数量)。
<entries>
<entry>
<id>1</id>
<published>23/02/2012</published>
<title>Title 1</title>
<content type="html">This is title one</content>
<author>
<name>Pankaj</name>
</author>
</entry>
<entry>
<id>2</id>
<published>22/02/2012</published>
<title>Title 2</title>
<content type="html">This is title two</content>
<author>
<name>Pankaj</name>
</author>
</entry>
<entry>
<id>3</id>
<published>21/02/2012</published>
<title>Title 3</title>
<content type="html">This is title three</content>
<author>
<name>Rob</name>
</author>
</entry>
<entry>
<id>4</id>
<published>20/02/2012</published>
<title>Title 4</title>
<content type="html">This is title four</content>
<author>
<name>Bob</name>
</author>
</entry>
<entry>
<id>5</id>
<published>19/02/2012</published>
<title>Title 1</title>
<content type="html">This is title five</content>
<author>
<name>Pankaj</name>
</author>
</entry>
我正在尝试从xquery获取输出:
<?xml version="1.0" encoding="UTF-8"?>
<results>
<result>
<author>
<name>Pankaj</name>
</author>
<numberOfTitles>3</numberOfTitles>
</result>
<result>
<author>
<name>Rob</name>
</author>
<numberOfTitles>1</numberOfTitles>
</result>
<result>
<author>
<name>Bob</name>
</author>
<numberOfTitles>1</numberOfTitles>
</result>
请帮帮我..
答案 0 :(得分:4)
这是我的解决方案:
<results>{
for $entry in //entry
let $date := xs:date(string-join(reverse(tokenize($entry/published, '/')), '-')),
$author := $entry/author/string()
where xs:date('2012-02-15') le $date and $date le xs:date('2012-02-24')
group by $author
order by count($entry) descending
return <result>{
<author>
<name>{$author}</name>
</author>,
<numberOfTitles>{count($entry)}</numberOfTitles>
}</result>
}</results>
使用BaseX执行时,会产生正确的结果。
它使用XQuery 3.0 features like group by
,否则会更复杂。我不知道MarkLogic是否支持这一点。
答案 1 :(得分:4)
此XQuery 1.0解决方案可由任何兼容的XQuery 1.0处理器执行:
注意:不使用group by
,也不使用distinct-values()
。
<results>
{
let $entries :=
/*/entry
[for $d in
xs:date(string-join(reverse(tokenize(published, '/')), '-'))
return
xs:date('2012-02-15') le $d and $d le xs:date('2012-02-24')
],
$vals := $entries/author/name
return
for $a in $vals[index-of($vals, .)[1]],
$cnt in count(index-of($vals, $a))
order by $cnt descending
return
<result>
<author>
{$a}
</author>
<numberOfTitles>
{count(index-of($vals, $a))}
</numberOfTitles>
</result>
}
</results>
应用于提供的XML文档:
<entries>
<entry>
<id>1</id>
<published>23/02/2012</published>
<title>Title 1</title>
<content type="html">This is title one</content>
<author>
<name>Pankaj</name>
</author>
</entry>
<entry>
<id>2</id>
<published>22/02/2012</published>
<title>Title 2</title>
<content type="html">This is title two</content>
<author>
<name>Pankaj</name>
</author>
</entry>
<entry>
<id>3</id>
<published>21/02/2012</published>
<title>Title 3</title>
<content type="html">This is title three</content>
<author>
<name>Rob</name>
</author>
</entry>
<entry>
<id>4</id>
<published>20/02/2012</published>
<title>Title 4</title>
<content type="html">This is title four</content>
<author>
<name>Bob</name>
</author>
</entry>
<entry>
<id>5</id>
<published>19/02/2012</published>
<title>Title 1</title>
<content type="html">This is title five</content>
<author>
<name>Pankaj</name>
</author>
</entry>
</entries>
生成想要的正确结果:
<?xml version="1.0" encoding="UTF-8"?>
<results>
<result>
<author>
<name>Pankaj</name>
</author>
<numberOfTitles>3</numberOfTitles>
</result>
<result>
<author>
<name>Rob</name>
</author>
<numberOfTitles>1</numberOfTitles>
</result>
<result>
<author>
<name>Bob</name>
</author>
<numberOfTitles>1</numberOfTitles>
</result>
</results>
答案 2 :(得分:4)
以下是MarkLogic特有的解决方案,使用地图有效地实施分组。输入XML已声明为$INPUT
,但您可以通过调用doc()
或任何其他访问者来替换它。
我去年在博客文章中探讨过这个话题:http://blakeley.com/blogofile/archives/560/
element results {
let $m := map:map()
let $start := xs:date('2012-02-15')
let $stop := xs:date('2012-02-24')
let $group :=
for $entry in $INPUT/entry
let $key := $entry/author/name/string()
let $date := xs:date(xdmp:parse-yymmdd("dd/MM/yyyy", $entry/published))
where $date ge $start and $date le $stop
return map:put($m, $key, 1 + (map:get($m, $key), 0)[1])
for $key in map:keys($m)
let $count := map:get($m, $key)
order by $count
return element result {
element author { element name { $key }},
element numberOfTitles { $count } } }
答案 3 :(得分:2)
以下内容适用于大多数处理器。可以在MarkLogic中进行更高效的查询,但这可以帮助您入门。
let $doc := <entries>
<entry>
<id>1</id>
<published>23/02/2012</published>
<title>Title 1</title>
<content type="html">This is title one</content>
<author>
<name>Pankaj</name>
</author>
</entry>
<entry>
<id>2</id>
<published>22/02/2012</published>
<title>Title 2</title>
<content type="html">This is title two</content>
<author>
<name>Pankaj</name>
</author>
</entry>
<entry>
<id>3</id>
<published>21/02/2012</published>
<title>Title 3</title>
<content type="html">This is title three</content>
<author>
<name>Rob</name>
</author>
</entry>
<entry>
<id>4</id>
<published>20/02/2012</published>
<title>Title 4</title>
<content type="html">This is title four</content>
<author>
<name>Bob</name>
</author>
</entry>
<entry>
<id>5</id>
<published>19/02/2012</published>
<title>Title 1</title>
<content type="html">This is title five</content>
<author>
<name>Pankaj</name>
</author>
</entry>
</entries>
return
<results>
{
for $author in distinct-values($doc/entry/author/name/string())
return
<result><author>
<name>{$author}</name>
<numberOfTitles>{count($doc/entry[author/name/string() eq $author])} </numberOfTitles>
</author></result>
}
</results>
答案 4 :(得分:2)
这是另一个类似于LeoWörteler的解决方案:
declare function local:FormatDate($origDate as xs:string) as xs:date
{
xs:date(string-join(reverse(tokenize($origDate, '/')), '-'))
};
<results>
{
for $author in distinct-values(/entries/entry/author/name)
let $startDate := xs:date('2012-02-15')
let $endDate := xs:date('2012-02-24')
order by count(/entries/entry[author/name=$author][$startDate <= local:FormatDate(published) and local:FormatDate(published) <= $endDate]) descending
return
<result>
<author>
<name>{$author}</name>
</author>
<numberOfTitles>{count(/entries/entry[author/name=$author][$startDate <= local:FormatDate(published) and local:FormatDate(published) <= $endDate])}</numberOfTitles>
</result>
}
</results>
答案 5 :(得分:2)
+1。其他解决方案将count(/entry/author[$name=xx])
子句或其他XPath
嵌套在 FLWOR 中,这实际上是一个嵌套循环。嵌套循环会导致 O(N ^ 2)性能,这在测试中可以很好,然后在数据大小增加时减慢。