R中的Parse Ordnance Survey SPARQL GET请求

时间:2016-10-19 13:27:06

标签: r xml geometry sparql geospatial

我对这个主题有点新意,所以请耐心等待;

我正在从R访问Ordnance Survey SPARQL Endpoint来获取他们的RDF数据。我在解析返回的GML几何属性时遇到了问题。

我的SPARQL查询(例如)用一些属性(名称,代码和URI)返回爱丁堡南选举选区的几何;

require(SPARQL)
require(XML)

endpoint <- "http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"

query <- paste0( 
"PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT ?WestminsterConstituencyName ?gssCode ?uri ?g
WHERE 
{ 
?uri a geog:WestminsterConstituency;
skos:prefLabel ?WestminsterConstituencyName;
geog:gssCode ?gssCode;
geom:extent ?geom .
?geom geom:asGML ?g.
}
HAVING(?WestminsterConstituencyName=\"Edinburgh South\")")

在R中,我想在传单中提取和映射这些结果。我目前有两种方法可以从上面的查询中获得结果;

  1. 通过httr包使用GET请求并生成XML数据;

    packs <- c("sp","stringr","rgdal","leaflet","gsubfn","XML","SPARQL","plyr","RColorBrewer","utils","httr")
    lapply(packs, require, character.only = TRUE)
    
    #format the request properly to return XML
    request <- paste0("http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql?query=",URLencode(query, reserved = TRUE),"&output=xml")
    
    #GET request from the endpoint and parse to XML
    data <- GET(request)
    data.xml <- xmlParse(content(data,"text"))
    
  2. 使用SPARQL包返回数据帧;

    # set endpoint URL and pass query to it, just keeping the results
    qd <- SPARQL(endpoint,query)$results
    
  3. 使用任一方法时,返回的几何体(无论是XML变量还是数据帧列)都包含所有GML标记:

    "<gml:Polygon><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>LONG LIST OF COORDINATE PAIRS</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon>"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral>
    

    当使用方法2时,我可以执行以下解决方法来创建一个多边形,但它看起来非常难看;

    # BNG proj4 string
    BNG <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"
    
    # extract data to attach to SpatialPolygonDataFrame later
    data <- qd[,!(colnames(qd) == "g")]
    
    # extract geometry part of results
    geom <- qd[,"g"]
    
    # use various gsub and split functions to create a dataframe of X and Y
    geom.sub <- sub(".*<gml:coordinates> *(.*?) *</gml:coordinates>.*", "\\1", geom)
    s <- strsplit(as.character(geom.sub), ' ')
    coords <- data.frame(coords=unlist(s))
    coords[] <- lapply(coords, as.character)
    l <- strsplit(coords$coords, ",")
    df <- ldply(l)
    colnames(df) <- c("x", "y")
    df[] <- lapply(df, as.numeric)
    
    # create Polygon(s)
    geom.list <- Polygons(list(Polygon(df)),1)
    final <- SpatialPolygons(list(geom.list),proj4string=CRS(BNG))
    final.df <- SpatialPolygonsDataFrame(final,data)
    

    方法1返回这样的XML文件;

    <?xml version="1.0"?>
    <sparql xmlns="http://www.w3.org/2005/sparql-results#">
      <head>
        <variable name="WestminsterConstituencyName"/>
        <variable name="gssCode"/>
        <variable name="uri"/>
        <variable name="g"/>
      </head>
      <results>
        <result>
          <binding name="WestminsterConstituencyName">
            <literal>Edinburgh South</literal>
          </binding>
          <binding name="gssCode">
            <literal>S14000024</literal>
          </binding>
          <binding name="uri">
            <uri>http://data.ordnancesurvey.co.uk/id/7000000000033932</uri>
          </binding>
          <binding name="g">
            <literal datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral">&lt;gml:Polygon&gt;&lt;gml:outerBoundaryIs&gt;&lt;gml:LinearRing&gt;&lt;gml:coordinates&gt;LOTS OF COORDINATE PAIRS HERE&lt;/gml:coordinates&gt;&lt;/gml:LinearRing&gt;&lt;/gml:outerBoundaryIs&gt;&lt;/gml:Polygon&gt;</literal>
          </binding>
        </result>
      </results>
    </sparql>
    

    但我不知道如何从XMl结果制作多边形(可写入shapefile)或甚至JSON。我更喜欢使用XML,因为我也想访问其他XML资源。

    此外,方法2(stringsplitting等)的处理真的可行吗?什么是数据并不总是符合?多部分多边形的信息会丢失吗? (我认为它会)。有没有更合适的&#39;方法

    感谢您的帮助。

1 个答案:

答案 0 :(得分:1)

我不知道这是否符合您对(普通香草?)XML的兴趣,但它确实绘制了一张地图。我参数化了WestminsterConstituencyName,因此您甚至可以将其转换为函数。

我对GML一无所知,但是我决定按原样使用它,而不是将坐标解析成更像数据框的通用。经过一些研究,我相信rgdal可以作为操作系统的GML数据和传单预期输入之间的桥梁。 SPARQL中的字符串转换消除了删除引号和R中字符串文字输入的需要。我怀疑rgdal 可能能够直接使用GML执行某些操作来自操作系统的多边形,但是将多边形包裹到一些定义图层和特征的GML中对我来说更清晰。

合理的下一步是尝试使用readOGR的文本连接,因此您不需要保存然后打开文件。

这里是Shiny implementation。我没有尝试任何性能改进,如缓存。

require(SPARQL)
require(XML)
require(leaflet)

# requires some system libraries
# I followed this for ubuntu
# http://www.sarasafavi.com/installing-gdalogr-on-ubuntu.html
# might also need to do something like
# sudo apt-get install libgdal-dev libproj-dev
library(rgdal)

WestminsterConstituency <- "Edinburgh South"

endpoint <-
  "http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"

query <- paste0(
  "PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
  PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
  PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

  SELECT ?WestminsterConstituencyName ?gssCode ?uri (str(?g) as ?gstr)
  WHERE
  {
  ?uri a geog:WestminsterConstituency;
  skos:prefLabel ?WestminsterConstituencyName;
  geog:gssCode ?gssCode;
  geom:extent ?geom .
  ?geom geom:asGML ?g.
  }
  HAVING(?WestminsterConstituencyName='",
  WestminsterConstituency,
  "')"
)

qd <- SPARQL(endpoint, query)$results

xmlres <-
  xmlTreeParse(qd$gstr, asText = TRUE, useInternalNodes = TRUE)

template.text <- '<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:featureMember>
<WestminsterConstituency fid="0">
<ogr:geometryProperty>

</ogr:geometryProperty>
</WestminsterConstituency>
</gml:featureMember>
</ogr:FeatureCollection>'

# some node adding ideas from
# http://stackoverflow.com/questions/35201830/r-insert-node-into-xml-tree-at-specific-location
template.xml = xmlTreeParse(template.text,
                            useInternalNodes = TRUE,
                            asText = TRUE)
template.top = xmlRoot(template.xml)

name.node = newXMLNode("NAME", WestminsterConstituency)

template.node = xmlElementsByTagName(el = template.top,
                                     name = "WestminsterConstituency",
                                     recursive = TRUE)

addChildren(template.node[[1]], kids = list(name.node))

template.node = xmlElementsByTagName(el = template.top,
                                     name = "geometryProperty",
                                     recursive = TRUE)

addChildren(template.node[[1]], kids = list(xmlres))

saveXML(template.top, 'expanded.gml')

if ("GML" %in% ogrDrivers()$name) {
  WC <-
    try(readOGR(dsn = 'expanded.gml', layer = "WestminsterConstituency"))
  if (class(WC) != "try-error")
    summary(WC)
}

# http://www.alex-singleton.com/R-Tutorial-Materials/7-converting-coordinates.pdf
ukgrid  <-  "+init=epsg:27700"
latlong  <-  "+init=epsg:4326"

# http://gis.stackexchange.com/questions/123212/assign-crs-to-shapefile-in-r
proj4string(WC) <- CRS(ukgrid)
WC.LL <- spTransform(WC, CRS(latlong))

m <- leaflet()
m <- addTiles(m)
m <- addPolygons(data = WC.LL, map = m)
m