SPARQL地理空间查询(MarkLogic)

时间:2019-05-28 09:41:29

标签: sparql marklogic

继续上一个问题here。需要指出的是,在SPARQL查询中应避免避免使用fn:doc()。但是,除了下面显示的代码外,对于地理空间查询,我无法找到其他解决方案。我也使用了此查询,它的运行时间确实很慢。对于更大的数据集,它将达到1小时超时。

因此,我想问问是否有更好的方法为SPARQL实现地理空间查询?可以将GEOSPARQL与PREFIX spatial:<http://jena.apache.org/spatial#>一起使用吗?

xquery version "1.0-ml";
import module namespace sem = "http://marklogic.com/semantics" at "/MarkLogic/semantics.xqy";
import module namespace thsr="http://marklogic.com/xdmp/thesaurus" 
                             at "/MarkLogic/thesaurus.xqy";

let $query := sem:sparql(
'
PREFIX xs: <http://www.w3.org/2001/XMLSchema#>
PREFIX cts: <http://marklogic.com/cts#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema/>
PREFIX fn: <http://www.w3.org/2005/xpath-functions#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX db: <http://dbpedia.org/resource/>
PREFIX onto: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns>
PREFIX xdmp: <http://marklogic.com/xdmp#>

SELECT *
WHERE{
?people </posted> ?question .
FILTER (cts:contains(fn:doc(?people), 
cts:path-geospatial-query("/people_data/location",  cts:circle(10, cts:point(59,28)))
)) .
}',
(),
(),
()
)

return (xdmp:elapsed-time())

======= 更新 =======

示例数据

lexicon.people

{"lexicon.people":"/people/Aaren_DETERS/000000000055933"}
{"lexicon.people":"/people/Aaren_HOWK/000000000117433"}
{"lexicon.people":"/people/Aaren_HUSTEDT/000000000038649"}
{"lexicon.people":"/people/Aaren_SHUSTA/000000000123065"}
{"lexicon.people":"/people/Aaren_SIEBERS/000000000035010"}
{"lexicon.people":"/people/Aarika_BETHARD/000000000048955"}
{"lexicon.people":"/people/Aarika_CHO/000000000093078"}
{"lexicon.people":"/people/Aarika_EVORA/000000000117911"}
{"lexicon.people":"/people/Aarika_LUCKRITZ/000000000001593"}
{"lexicon.people":"/people/Aarika_MCALPHIN/000000000043365"}
{"lexicon.people":"/people/Aarika_PAET/000000000067579"}
{"lexicon.people":"/people/Aarika_SENGUN/000000000047752"}
{"lexicon.people":"/people/Aarika_WEDEMEYER/000000000022594"}
......

sparql.person

......
{"sparql.person":"/people/Aaren_CRIBLEZ/000000000087536", "sparql.dob":"1999-07-21T00:19:21"}
{"sparql.person":"/people/Aaren_DEBRITO/000000000049208", "sparql.dob":"2018-10-09T08:09:48"}
{"sparql.person":"/people/Aaren_DEMASTERS/000000000091082", "sparql.dob":"2014-08-21T06:43:44"}
{"sparql.person":"/people/Aaren_DETERS/000000000055933", "sparql.dob":"2011-09-11T02:44:22"}
{"sparql.person":"/people/Aaren_GARY/000000000100658", "sparql.dob":"1998-06-04T00:39:23"}
{"sparql.person":"/people/Aaren_HANNAWAY/000000000045087", "sparql.dob":"2002-06-17T05:45:15"}
{"sparql.person":"/people/Aaren_HOWK/000000000117433", "sparql.dob":"2002-06-21T06:00:34"}
{"sparql.person":"/people/Aaren_HUSTEDT/000000000038649", "sparql.dob":"2018-06-07T14:56:39"}
{"sparql.person":"/people/Aaren_JURICH/000000000039301", "sparql.dob":"2003-07-14T16:20:05"}
{"sparql.person":"/people/Aaren_KRACK/000000000101407", "sparql.dob":"2014-03-19T06:25:39"}
{"sparql.person":"/people/Aaren_MACCONNELL/000000000053205", "sparql.dob":"1995-06-21T16:15:53"}
.....

使用查询

import module namespace op="http://marklogic.com/optic" at "/MarkLogic/optic.xqy";
let $people := op:from-lexicons(
  map:entry("people",cts:uri-reference()),
  "lexicon"
)=>op:where(
     cts:path-geospatial-query("people_data/location",
    cts:circle(7500, cts:point(89.39101918779838, 51.97989163203445)),
    "type=long-lat-point")
  )

let $questions := op:from-sparql('SELECT * WHERE {?person </has_dob> ?dob. }', "sparql")

return $questions  => op:join-inner(
    $people,
    op:on(
      op:view-col("lexicon","people"),
      op:view-col("sparql", "person")
    )
  ) => op:result()

1 个答案:

答案 0 :(得分:1)

我在这里看到两个选项:

  • 您可以使用MarkLogic内置的地理空间功能直接从SPARQL内部查找地理空间重叠,最好是比较RDF属性,而不是比较路径索引中的值(仍然不理想)
  • 更好:预取与您的地理空间约束匹配的文档列表,并将其作为约束输入到SPARQL中(这应该是高性能的)

类似的东西:

let $uris := cts:uris((), (), cts:path-geospatial-query("/people_data/location",  cts:circle(10, cts:point(59,28))))
return sem:sparql('
  SELECT *
  WHERE{
    ?person </posted> ?question .
    FILTER (?person = ?people) .
  }
', map:entry("people", $uris))

以上示例的更方便,更好的优化是使用Optic API重写它。它专门用于提供一种高性能的方式来弥合各种数据模型之间的差距。

推断上面的代码,我认为它将在光学代码中读取如下内容:

import module namespace op="http://marklogic.com/optic" at "/MarkLogic/optic.xqy";

let $people := op:from-lexicons(
  map:entry("people", cts:uri-reference()),
  "lexicon"
)
  => op:where(
    cts:path-geospatial-query("/people_data/location", cts:circle(10, cts:point(59,28)))
  )

let $questions := op:from-sparql('SELECT * WHERE { ?person </posted> ?question. }', "sparql")

return $people
  => op:join-inner(
    $questions,
    op:on(
      op:view-col("lexicon", "people"),
      op:view-col("sparql", "person")
    )
  )
  =>op:result()

在没有适当的数据和索引的情况下进行测试有点困难,但是我希望它足以使您入门。

您可以在此处找到介绍性文档:

https://docs.marklogic.com/guide/app-dev/OpticAPI

可以在这里找到API参考:

https://docs.marklogic.com/op

HTH!