我对这个主题有点新意,所以请耐心等待;
我正在从R访问Ordnance Survey SPARQL Endpoint来获取他们的RDF数据。我在解析返回的GML几何属性时遇到了问题。
我的SPARQL查询(例如)用一些属性(名称,代码和URI)返回爱丁堡南选举选区的几何;
require(SPARQL)
require(XML)
endpoint <- "http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"
query <- paste0(
"PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?WestminsterConstituencyName ?gssCode ?uri ?g
WHERE
{
?uri a geog:WestminsterConstituency;
skos:prefLabel ?WestminsterConstituencyName;
geog:gssCode ?gssCode;
geom:extent ?geom .
?geom geom:asGML ?g.
}
HAVING(?WestminsterConstituencyName=\"Edinburgh South\")")
在R中,我想在传单中提取和映射这些结果。我目前有两种方法可以从上面的查询中获得结果;
通过httr包使用GET请求并生成XML数据;
packs <- c("sp","stringr","rgdal","leaflet","gsubfn","XML","SPARQL","plyr","RColorBrewer","utils","httr")
lapply(packs, require, character.only = TRUE)
#format the request properly to return XML
request <- paste0("http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql?query=",URLencode(query, reserved = TRUE),"&output=xml")
#GET request from the endpoint and parse to XML
data <- GET(request)
data.xml <- xmlParse(content(data,"text"))
使用SPARQL包返回数据帧;
# set endpoint URL and pass query to it, just keeping the results
qd <- SPARQL(endpoint,query)$results
使用任一方法时,返回的几何体(无论是XML变量还是数据帧列)都包含所有GML标记:
"<gml:Polygon><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>LONG LIST OF COORDINATE PAIRS</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon>"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral>
当使用方法2时,我可以执行以下解决方法来创建一个多边形,但它看起来非常难看;
# BNG proj4 string
BNG <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"
# extract data to attach to SpatialPolygonDataFrame later
data <- qd[,!(colnames(qd) == "g")]
# extract geometry part of results
geom <- qd[,"g"]
# use various gsub and split functions to create a dataframe of X and Y
geom.sub <- sub(".*<gml:coordinates> *(.*?) *</gml:coordinates>.*", "\\1", geom)
s <- strsplit(as.character(geom.sub), ' ')
coords <- data.frame(coords=unlist(s))
coords[] <- lapply(coords, as.character)
l <- strsplit(coords$coords, ",")
df <- ldply(l)
colnames(df) <- c("x", "y")
df[] <- lapply(df, as.numeric)
# create Polygon(s)
geom.list <- Polygons(list(Polygon(df)),1)
final <- SpatialPolygons(list(geom.list),proj4string=CRS(BNG))
final.df <- SpatialPolygonsDataFrame(final,data)
方法1返回这样的XML文件;
<?xml version="1.0"?>
<sparql xmlns="http://www.w3.org/2005/sparql-results#">
<head>
<variable name="WestminsterConstituencyName"/>
<variable name="gssCode"/>
<variable name="uri"/>
<variable name="g"/>
</head>
<results>
<result>
<binding name="WestminsterConstituencyName">
<literal>Edinburgh South</literal>
</binding>
<binding name="gssCode">
<literal>S14000024</literal>
</binding>
<binding name="uri">
<uri>http://data.ordnancesurvey.co.uk/id/7000000000033932</uri>
</binding>
<binding name="g">
<literal datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"><gml:Polygon><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>LOTS OF COORDINATE PAIRS HERE</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></literal>
</binding>
</result>
</results>
</sparql>
但我不知道如何从XMl结果制作多边形(可写入shapefile)或甚至JSON。我更喜欢使用XML,因为我也想访问其他XML资源。
此外,方法2(stringsplitting等)的处理真的可行吗?什么是数据并不总是符合?多部分多边形的信息会丢失吗? (我认为它会)。有没有更合适的&#39;方法
感谢您的帮助。
答案 0 :(得分:1)
我不知道这是否符合您对(普通香草?)XML的兴趣,但它确实绘制了一张地图。我参数化了WestminsterConstituencyName,因此您甚至可以将其转换为函数。
我对GML一无所知,但是我决定按原样使用它,而不是将坐标解析成更像数据框的通用。经过一些研究,我相信rgdal可以作为操作系统的GML数据和传单预期输入之间的桥梁。 SPARQL中的字符串转换消除了删除引号和R中字符串文字输入的需要。我怀疑rgdal 可能能够直接使用GML执行某些操作来自操作系统的多边形,但是将多边形包裹到一些定义图层和特征的GML中对我来说更清晰。
合理的下一步是尝试使用readOGR的文本连接,因此您不需要保存然后打开文件。
这里是Shiny implementation。我没有尝试任何性能改进,如缓存。
require(SPARQL)
require(XML)
require(leaflet)
# requires some system libraries
# I followed this for ubuntu
# http://www.sarasafavi.com/installing-gdalogr-on-ubuntu.html
# might also need to do something like
# sudo apt-get install libgdal-dev libproj-dev
library(rgdal)
WestminsterConstituency <- "Edinburgh South"
endpoint <-
"http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"
query <- paste0(
"PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?WestminsterConstituencyName ?gssCode ?uri (str(?g) as ?gstr)
WHERE
{
?uri a geog:WestminsterConstituency;
skos:prefLabel ?WestminsterConstituencyName;
geog:gssCode ?gssCode;
geom:extent ?geom .
?geom geom:asGML ?g.
}
HAVING(?WestminsterConstituencyName='",
WestminsterConstituency,
"')"
)
qd <- SPARQL(endpoint, query)$results
xmlres <-
xmlTreeParse(qd$gstr, asText = TRUE, useInternalNodes = TRUE)
template.text <- '<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:featureMember>
<WestminsterConstituency fid="0">
<ogr:geometryProperty>
</ogr:geometryProperty>
</WestminsterConstituency>
</gml:featureMember>
</ogr:FeatureCollection>'
# some node adding ideas from
# http://stackoverflow.com/questions/35201830/r-insert-node-into-xml-tree-at-specific-location
template.xml = xmlTreeParse(template.text,
useInternalNodes = TRUE,
asText = TRUE)
template.top = xmlRoot(template.xml)
name.node = newXMLNode("NAME", WestminsterConstituency)
template.node = xmlElementsByTagName(el = template.top,
name = "WestminsterConstituency",
recursive = TRUE)
addChildren(template.node[[1]], kids = list(name.node))
template.node = xmlElementsByTagName(el = template.top,
name = "geometryProperty",
recursive = TRUE)
addChildren(template.node[[1]], kids = list(xmlres))
saveXML(template.top, 'expanded.gml')
if ("GML" %in% ogrDrivers()$name) {
WC <-
try(readOGR(dsn = 'expanded.gml', layer = "WestminsterConstituency"))
if (class(WC) != "try-error")
summary(WC)
}
# http://www.alex-singleton.com/R-Tutorial-Materials/7-converting-coordinates.pdf
ukgrid <- "+init=epsg:27700"
latlong <- "+init=epsg:4326"
# http://gis.stackexchange.com/questions/123212/assign-crs-to-shapefile-in-r
proj4string(WC) <- CRS(ukgrid)
WC.LL <- spTransform(WC, CRS(latlong))
m <- leaflet()
m <- addTiles(m)
m <- addPolygons(data = WC.LL, map = m)
m