SPARQL - 超过56000三元组的文件 - 如何一次检查条件

时间:2018-05-28 19:28:19

标签: sparql rdf

我是SPARQL的新手。我有一个(对我来说)来自Project Gutenberg的巨大的RDF / XML文件,我试图找到一些具有某些特定品质的书籍。这本书应该来自虚构的"流派,用英文写成。

我想使用SPARQL来创建此过滤器。我知道例如如何进行简单的查询(例如检索书的标题)但是当我必须创建复杂的查询时,我迷路了。

一旦我的查询结束,一旦我知道这是一本虚构的书,我想检索诸如以下信息:作者,语言,标题和ID。

这是RDF / XML文件的摘录。

<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xml:base="http://www.gutenberg.org/"
  xmlns:cc="http://web.resource.org/cc/"
  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
  xmlns:dcterms="http://purl.org/dc/terms/"
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns:dcam="http://purl.org/dc/dcam/"
  xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/"
>
  <cc:Work rdf:about="">
    <rdfs:comment>Archives containing the RDF files for *all* our books can be downloaded at
            http://www.gutenberg.org/wiki/Gutenberg:Feeds#The_Complete_Project_Gutenberg_Catalog</rdfs:comment>
    <cc:license rdf:resource="https://creativecommons.org/publicdomain/zero/1.0/"/>
  </cc:Work>

  <pgterms:ebook rdf:about="ebooks/123">
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/files/123/123-h/123-h.htm">
        <dcterms:format>
          <rdf:Description rdf:nodeID="N2c0860e4a086416ca54377ad8d2feb3a">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/html; charset=iso-8859-1</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">309310</dcterms:extent>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2012-07-19T10:37:04</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
      </pgterms:file>
    </dcterms:hasFormat>

    <dcterms:subject>
      <rdf:Description rdf:nodeID="Na447b67898ca445eb5df7fe22decdcc8">
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
        <rdf:value>Science fiction</rdf:value>
      </rdf:Description>
    </dcterms:subject>
    <dcterms:type>
      <rdf:Description rdf:nodeID="Nc3bf0165309d49cc9ff92b4f34bf3d0b">
        <rdf:value>Text</rdf:value>
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
      </rdf:Description>
    </dcterms:type>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/123.txt.utf-8">
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-10-01T01:21:10.697628</dcterms:modified>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">294801</dcterms:extent>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N69057b2da52d4b848e6b7ab307920347">
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:language>
      <rdf:Description rdf:nodeID="N3e77dee5bbbb413089732da352a8a605">
        <rdf:value rdf:datatype="http://purl.org/dc/terms/RFC4646">en</rdf:value>
      </rdf:Description>
    </dcterms:language>
    <dcterms:rights>Public domain in the USA.</dcterms:rights>
    <dcterms:title>At the Earth's Core</dcterms:title>
    <dcterms:description>Pellucidar</dcterms:description>
    <dcterms:subject>
      <rdf:Description rdf:nodeID="N0748f419c34e4f75abdf52404dcbd879">
        <rdf:value>Adventure stories</rdf:value>
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
      </rdf:Description>
    </dcterms:subject>
    <dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">1994-04-01</dcterms:issued>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/files/123/123.txt">
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">294829</dcterms:extent>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2012-07-19T10:36:58</dcterms:modified>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Ndcd1e1ce476947bc8c2144c400182de4">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:subject>
      <rdf:Description rdf:nodeID="N880067e98e604133b01bd7b752a86b07">
        <rdf:value>Earth (Planet) -- Core -- Fiction</rdf:value>
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
      </rdf:Description>
    </dcterms:subject>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/123.epub.noimages">
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">120042</dcterms:extent>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N3eba62c48ab24374b1e3acc96aae9db3">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-10-01T01:21:11.287607</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/123.kindle.images">
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-10-01T01:21:12.665594</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N32c49cff0f9947b697e4690f5e80fb45">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/x-mobipocket-ebook</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">501780</dcterms:extent>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:creator>
      <pgterms:agent rdf:about="2009/agents/48">
        <pgterms:name>Burroughs, Edgar Rice</pgterms:name>
        <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1950</pgterms:deathdate>
        <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Edgar_Rice_Burroughs"/>
        <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1875</pgterms:birthdate>
      </pgterms:agent>
    </dcterms:creator>
    <dcterms:publisher>Project Gutenberg</dcterms:publisher>
    <dcterms:license rdf:resource="license"/>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/123.rdf">
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nc0303d16343845c698d57f6fff44420f">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/rdf+xml</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2018-05-17T05:00:17.585186</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">12268</dcterms:extent>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:subject>
      <rdf:Description rdf:nodeID="N483a91a4b85a4618be4ea92726b976a0">
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCC"/>
        <rdf:value>PS</rdf:value>
      </rdf:Description>
    </dcterms:subject>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/123.epub.images">
        <dcterms:format>
          <rdf:Description rdf:nodeID="N11355a7445a8454d8fb33ecbd752949d">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">120040</dcterms:extent>
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-10-01T01:21:11.093621</dcterms:modified>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:subject>
      <rdf:Description rdf:nodeID="N6bf1581faa564781b44875beb4afd885">
        <rdf:value>Fantasy fiction</rdf:value>
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
      </rdf:Description>
    </dcterms:subject>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/files/123/123.zip">
        <dcterms:format>
          <rdf:Description rdf:nodeID="N9dc3758187eb4063bcb6b728000aa222">
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
          </rdf:Description>
        </dcterms:format>
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N43cf23a45ac1484a88bcad1e6af81564">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=us-ascii</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2012-07-19T10:37:28</dcterms:modified>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">112146</dcterms:extent>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/files/123/123-h.zip">
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">114129</dcterms:extent>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2012-07-19T10:37:28</dcterms:modified>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nbe9881c4945a49eca8219a3cecdc70bd">
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
          </rdf:Description>
        </dcterms:format>
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nc92f3d9ab6164c7eb7f56fa64a78d2ef">
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/html; charset=iso-8859-1</rdf:value>
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/123.kindle.noimages">
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-10-01T01:21:14.044548</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/123"/>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">501774</dcterms:extent>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N0878de4bb93e4612b7088ba1d16d9509">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/x-mobipocket-ebook</rdf:value>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <pgterms:bookshelf>
      <rdf:Description rdf:nodeID="N1ba815d6c47845bf8e6e3b96920d40dc">
        <rdf:value>Science Fiction</rdf:value>
        <dcam:memberOf rdf:resource="2009/pgterms/Bookshelf"/>
      </rdf:Description>
    </pgterms:bookshelf>
    <pgterms:downloads rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">178</pgterms:downloads>
  </pgterms:ebook>


  <rdf:Description rdf:about="http://en.wikipedia.org/wiki/Edgar_Rice_Burroughs">
    <dcterms:description>Wikipedia</dcterms:description>
  </rdf:Description>
</rdf:RDF>

1 个答案:

答案 0 :(得分:2)

要获取dcterms:subject标签中包含“小说”字样的所有图书,您可以执行以下操作:

SELECT DISTINCT ?book
WHERE { 
  ?book dcterms:subject [ rdf:value ?g ] .
  FILTER(REGEX(STR(?g), "fiction", "i")) 
}

此查询会抓取具有dcterms:subject值的所有资源,而rdf:value值又具有值?g的{​​{1}}属性。然后,FILTER条件会检查该值?g是否包含“小说”一词。

要获取所返回图书的其他属性,只需展开查询即可。例如,为了取回书名,你可以这样做:

SELECT DISTINCT ?book ?title
WHERE { 
  ?book dcterms:title ?title;
        dcterms:subject [ rdf:value ?g ] .
  FILTER(REGEX(STR(?g), "fiction", "i")) 
}

希望这可以让你开始,借助SPARQL教程和一些肘部油脂,你应该能够扩展它。