gIntersects()
包中用于测试点是否位于多边形中的函数rgeos
非常慢。有没有一种加速计算的好方法?
答案 0 :(得分:2)
我尝试了评论中已经提到的sp::point.in.polygon
,sp::over
和rgeos::gIntersects()
之间的速度比较。请注意point.in.poly
中还有一个{spatialEco}
函数,但它似乎只是sp::over
的包装。
我意识到sp::point.in.polygon
不能很好地处理多部分多边形(也指向here)并且需要提供原始坐标(所以我认为对于多部分多边形需要用于循环)。请注意,sp::point.in.polygon
仅在方形多边形的情况下它比其他所有其他更快,这使我认为只有更简单的形状才会更快。总而言之,无论何时遇到速度问题,juts都会尝试测试您的具体情况。对于我具体选择的例子,sp::over
似乎是一个更好的选择,但我不会概括。希望我的例子没问题,否则请随意纠正我。
由于没有提供数据,我在下面使用了一些例子。
library(rgeos)
library(sp)
library(microbenchmark)
library(ggplot2)
library(maps)
library(maptools)
library(raster)
# Get world map data
# (conversion code from "Applied Spatial Data Analysis with R")
worldmap <- maps::map("world", fill=TRUE, plot=FALSE)
# transform to SpatialPolygons
worldmapPolys <- maptools::map2SpatialPolygons(worldmap,
IDs=sapply(strsplit(worldmap$names, ":"), "[", 1L),
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Generate random points for entire world
set.seed(2017)
pts <- sp::spsample(worldmapPolys, n=10^5, type="random")
# Define functions to test for speed
gIntersects_tst <- function(my.pts, my.poly){
rgeos::gIntersects(spgeom1 = my.pts,
spgeom2 = my.poly,
byid = TRUE)
}
over_tst <- function(my.pts, my.poly){
sp::over(x = my.pts, y = my.poly)
}
point.in.polygon_tst <- function(my.pts, my.poly){
# get coordinates from polygon
XY <- raster::geom(my.poly)
sp::point.in.polygon(point.x = my.pts@coords[,1],
point.y = my.pts@coords[,2],
pol.x = XY[,5],
pol.y = XY[,6],
mode.checked = TRUE)
}
# Micro-benchmarking
# The idea is to test which points fall into a selected polygon (country)
res <- microbenchmark(TF1 <- gIntersects_tst(pts, worldmapPolys[183,]),
TF2 <- gIntersects_tst(worldmapPolys[183,], pts),
idx <- over_tst(pts, worldmapPolys[183,]),
codes <- point.in.polygon_tst(pts, worldmapPolys[183,]))
print(res)
## Unit: milliseconds
## expr min
## TF1 <- gIntersects_tst(pts, worldmapPolys[183, ]) 142.61992
## TF2 <- gIntersects_tst(worldmapPolys[183, ], pts) 125.99551
## idx <- over_tst(pts, worldmapPolys[183, ]) 50.72425
## codes <- point.in.polygon_tst(pts, worldmapPolys[183, ]) 224.57961
## lq mean median uq max neval cld
## 153.46915 174.42346 162.90885 177.69223 338.2691 100 b
## 136.13762 158.88218 144.89180 156.91664 352.3276 100 b
## 55.50899 69.67542 63.80366 78.12026 132.8704 100 a
## 243.12288 276.71458 257.38068 275.46144 589.9082 100 c
ggplot2::autoplot(res) + ggtitle("single-polygon: 100 evaluations")
请注意,对于gIntersects()
,参数的顺序似乎很重要。结果的速度和结构存在差异。
identical(TF1,TF2)
## [1] FALSE
identical(TF1[,1:length(pts)], TF2[1:length(pts),])
## [1] TRUE
class(TF1); str(TF1)
## [1] "matrix"
## logi [1, 1:100000] FALSE FALSE FALSE FALSE FALSE FALSE ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr "Romania"
## ..$ : chr [1:100000] "1" "2" "3" "4" ...
class(TF2); str(TF2)
## [1] "matrix"
## logi [1:100000, 1] FALSE FALSE FALSE FALSE FALSE FALSE ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:100000] "1" "2" "3" "4" ...
## ..$ : chr "Romania"
# Subset world points
pts.gI1 <- pts[TF1,]
pts.gI2 <- pts[TF2,]
pts.ovr <- pts[!is.na(idx),]
pts.PiP <- pts[as.logical(codes),]
# All subsets are identical
identical(pts.gI1, pts.gI2)
## [1] TRUE
identical(pts.gI2, pts.ovr)
## [1] TRUE
identical(pts.ovr, pts.PiP)
## [1] TRUE
# Generate two square polygons
grd <- sp::GridTopology(c(1,1), c(1,1), c(2,1))
polys <- sp::as.SpatialPolygons.GridTopology(grd)
# Generate some random points
set.seed(2017)
pts2 <- sp::spsample(polys, n=10^5, type="random")
# Micro-benchmarking
# Test only for those points falling in first square
res <- microbenchmark(TF1 <- gIntersects_tst(pts2, polys[1,]),
TF2 <- gIntersects_tst(polys[1,], pts2),
idx <- over_tst(pts2, polys[1,]),
codes <- point.in.polygon_tst(pts2, polys[1,]))
print(res)
## Unit: milliseconds
## expr min lq
## TF1 <- gIntersects_tst(pts2, polys[1, ]) 151.35336 165.23526
## TF2 <- gIntersects_tst(polys[1, ], pts2) 123.26241 135.90883
## idx <- over_tst(pts2, polys[1, ]) 54.84891 63.89454
## codes <- point.in.polygon_tst(pts2, polys[1, ]) 9.39330 10.66513
## mean median uq max neval cld
## 189.67848 177.62808 190.89566 365.92728 100 d
## 157.47151 148.50073 160.37567 314.02700 100 c
## 76.42608 70.66998 79.81225 240.55570 100 b
## 14.09199 11.37738 16.88741 46.19245 100 a
ggplot2::autoplot(res) + ggtitle("square polygon: 100 evaluations")
pts2.gI1 <- pts2[TF1,]
pts2.gI2 <- pts2[TF2,]
pts2.ovr <- pts2[!is.na(idx),]
pts2.PiP <- pts2[as.logical(codes),]
# All subsets are identical
identical(pts2.gI1, pts2.gI2)
## [1] TRUE
identical(pts2.gI2, pts2.ovr)
## [1] TRUE
identical(pts2.ovr, pts2.PiP)
## [1] TRUE
会话信息
sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] raster_2.5-8 mapview_1.2.0 leaflet_1.1.0 maptools_0.9-2 maps_3.1.1
[6] ggplot2_2.2.1 microbenchmark_1.4-2.1 sp_1.2-4 rgeos_0.3-23