在下面的reprex中,我对某些点和多边形数据运行了空间连接,但是使用sp
包和使用sf
包时意外地得到了不同的结果。为什么会这样?
我试图计算acled
网格正方形内的prio
个点,但是如下所示,即使从st_covers
运行sf
联接,包之间的计数也有所不同,据我所知应该在功能上与使用over
中的sp
方法相同。
library(sp) # packageVersion("sp") #> [1] ‘1.2.7’
library(sf) # packageVersion("sf") #> [1] ‘0.6.3’
library(rgdal)
library(maptools)
library(dplyr); library(tibble)
以下是我正在使用的示例数据:
# prio (polygon squares) and acled (points); in both sp and sf objects:
# prio sf polygons object
priosf <- structure(list(
CELL_ID = c(180365, 176783, 150830, 145866, 140055),
gwno = c(615L, 616L, 432L, 626L, 475L),
POP = c(111983.7, 107369.7, 12169.35, 23005.76, 527012.1),
prio_country = c("Algeria", "Tunisia", "Mali", "South Sudan", "Nigeria"),
geometry = structure(list(structure(list(structure(c(2, 2, 2.5, 2.5, 2, 35, 35.5, 35.5, 35, 35),
.Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(11, 11, 11.5, 11.5, 11, 32.5, 33, 33, 32.5, 32.5),
.Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(-5.5, -5.5, -5, -5, -5.5, 14.5, 15, 15, 14.5, 14.5),
.Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(32.5, 32.5, 33, 33, 32.5, 11, 11.5, 11.5, 11, 11),
.Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(7, 7, 7.5, 7.5, 7, 7, 7.5, 7.5, 7, 7),
.Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg"))),
class = c("sfc_POLYGON", "sfc"), precision = 0,
bbox = structure(c(-5.5, 7, 33, 35.5),
.Names = c("xmin", "ymin", "xmax", "ymax"),
class = "bbox"),
crs = structure(list(epsg = 4326L, proj4string = "+proj=longlat +datum=WGS84 +no_defs"),
.Names = c("epsg", "proj4string"), class = "crs"), n_empty = 0L)),
.Names = c("CELL_ID", "gwno", "POP", "prio_country", "geometry"),
row.names = c(NA, -5L), class = c("sf", "tbl_df", "tbl", "data.frame"),
sf_column = "geometry", agr = structure(c(NA_integer_, NA_integer_, NA_integer_, NA_integer_),
class = "factor", .Label = c("constant", "aggregate", "identity"),
.Names = c("CELL_ID", "gwno", "POP", "prio_country")))
# prio sp polygons object
priosp <- as(priosf, 'Spatial')
# acled data
acled <- structure(list(
EVENT_ID_CNTY = c("ALG3195", "ALG3316", "ALG4228",
"ALG4824", "MLI1050", "MLI1144", "MLI1423", "MLI1672", "NIG4606",
"NIG4951", "NIG6196", "NIG7661", "NIG9100", "SSD1216", "SSD1504",
"SSD3232", "SSD3234", "SSD3231", "SSD3239", "TUN1376", "TUN2597",
"TUN3217", "TUN3633"),
COUNTRY = c("Algeria", "Algeria", "Algeria",
"Algeria", "Mali", "Mali", "Mali", "Mali", "Nigeria", "Nigeria",
"Nigeria", "Nigeria", "Nigeria", "South Sudan", "South Sudan",
"South Sudan", "South Sudan", "South Sudan", "South Sudan", "Tunisia",
"Tunisia", "Tunisia", "Tunisia"),
LATITUDE = c(35.2122, 35.4343, 35.2122, 35.2122, 14.8252, 14.8252, 14.7414, 14.8252, 7.3028,
7.3028, 7.3028, 7.3028, 7.3588, 11.05, 11.05, 11.05, 11.05, 11.05, 11.05, 32.8487, 32.7149, 32.7149, 32.7149),
LONGITUDE = c(2.3189, 2.2166, 2.3189, 2.3189, -5.2547, -5.2547, -5.3282, -5.2547, 7.0382, 7.0382, 7.0382, 7.0382, 7.0994, 32.7, 32.7, 32.7, 32.7, 32.7, 32.7, 11.4309, 11.012, 11.012, 11.012)),
row.names = c(NA, -23L),
class = c("tbl_df", "tbl", "data.frame"),
.Names = c("EVENT_ID_CNTY", "COUNTRY", "LATITUDE", "LONGITUDE"))
# acled sf points object
acledsf <- st_as_sf(
acled,
coords = c('LATITUDE', 'LONGITUDE'),
crs = 4326
)
# acled sp points object
coordinates(acled) <- ~LONGITUDE+LATITUDE
proj4string(acled) <- proj4string(priosp)
acledsp <- acled; rm(acled)
sp
打包空间连接结果。我绑定了与每个点相交的多边形,将结果连接到这些点,然后计算了CELL_ID(多边形)的数量:
# sp spatial join:
addPolyDataToPts <- function (points, poly) {
polysByPoint <- over(points, poly)
points <- spCbind(points, polysByPoint)
}
acj <- addPolyDataToPts(acledsp, priosp)
(acled_count_sp <- acj@data %>% filter(!is.na(CELL_ID)) %>%
group_by(CELL_ID, prio_country, POP) %>%
summarize(acled_sp = n()) %>% arrange(CELL_ID) %>%
rename(prio_country_sp = prio_country))
#> # A tibble: 5 x 4
#> # Groups: CELL_ID, prio_country_sp [5]
#> CELL_ID prio_country_sp POP acled_sp
#> <dbl> <chr> <dbl> <int>
#> 1 140055. Nigeria 527012. 5
#> 2 145866. South Sudan 23006. 6
#> 3 150830. Mali 12169. 4
#> 4 176783. Tunisia 107370. 4
#> 5 180365. Algeria 111984. 4
模拟sf
打包空间连接结果,其中除一个多边形正方形外,我的计数列acled_sf
与上面的acled_sp
列不同。 (140055;尼日利亚):
# sf spatial join:
(acled_count_sf <-
st_join(priosf, acledsf, join = st_covers) %>%
group_by(CELL_ID, POP, prio_country) %>%
summarize(acled_sf = n()) %>% ungroup %>%
arrange(CELL_ID) %>%
rename(prio_country_sf = prio_country))
#> although coordinates are longitude/latitude, st_covers assumes that they are planar
#> Simple feature collection with 5 features and 4 fields
#> geometry type: POLYGON
#> dimension: XY
#> bbox: xmin: -5.5 ymin: 7 xmax: 33 ymax: 35.5
#> epsg (SRID): 4326
#> proj4string: +proj=longlat +datum=WGS84 +no_defs
#> # A tibble: 5 x 5
#> CELL_ID POP prio_country_sf acled_sf geometry
#> <dbl> <dbl> <chr> <int> <POLYGON [°]>
#> 1 140055. 527012. Nigeria 5 ((7 7, 7 7.5, 7.5 7.5, 7.5 7, …
#> 2 145866. 23006. South Sudan 4 ((32.5 11, 32.5 11.5, 33 11.5,…
#> 3 150830. 12169. Mali 1 ((-5.5 14.5, -5.5 15, -5 15, -…
#> 4 176783. 107370. Tunisia 6 ((11 32.5, 11 33, 11.5 33, 11.…
#> 5 180365. 111984. Algeria 1 ((2 35, 2 35.5, 2.5 35.5, 2.5 …
我的运行理论是,一种方法以错误的顺序绑定值,但是我不确定是哪种方法。在我的较大示例中,我得到了相似的值,但绑定到了不同的多边形,即对于sf
连接,'2706'点与单元1匹配,对于sp
连接,与单元2匹配。
(而且,在某些情况下,sf
联接中会丢失一些值)
任何对我的结果如何或为什么以这种方式有所不同的见解都将受到赞赏。
答案 0 :(得分:5)
因此,我花了点时间在mapview中绘制数据以弄清楚这里发生了什么,但是至少在给定的reprex中,您的问题是由于创建acledsf
时向后指定了经度和纬度宾语。以正确的顺序创建,并且联接输出匹配:
# acled sf points object
acledsf <- st_as_sf(
acled,
coords = c('LONGITUDE', 'LATITUDE'), ###notice the correct order here
crs = 4326
)
# acled sp points object
coordinates(acled) <- c("LONGITUDE", "LATITUDE")
proj4string(acled) <- proj4string(priosp)
acledsp <- acled; rm(acled)
addPolyDataToPts <- function (points, poly) {
polysByPoint <- over(points, poly)
points <- spCbind(points, polysByPoint)
}
acj <- addPolyDataToPts(acledsp, priosp)
(acled_count_sp <- acj@data %>% filter(!is.na(CELL_ID)) %>%
group_by(CELL_ID, prio_country, POP) %>%
summarize(acled_sp = n()) %>% arrange(CELL_ID) %>%
rename(prio_country_sp = prio_country))
#> # A tibble: 5 x 4
#> # Groups: CELL_ID, prio_country_sp [5]
#> CELL_ID prio_country_sp POP acled_sp
#> <dbl> <chr> <dbl> <int>
#> 1 140055 Nigeria 527012. 5
#> 2 145866 South Sudan 23006. 6
#> 3 150830 Mali 12169. 4
#> 4 176783 Tunisia 107370. 4
#> 5 180365 Algeria 111984. 4
### sf
(acled_count_sf <-
st_join(priosf, acledsf, join = st_covers) %>%
group_by(CELL_ID, prio_country, POP) %>%
summarize(acled_sf = n()) %>% ungroup %>%
arrange(CELL_ID) %>%
rename(prio_country_sf = prio_country))
#> although coordinates are longitude/latitude, st_covers assumes that they are planar
#> Simple feature collection with 5 features and 4 fields
#> geometry type: POLYGON
#> dimension: XY
#> bbox: xmin: -5.5 ymin: 7 xmax: 33 ymax: 35.5
#> epsg (SRID): 4326
#> proj4string: +proj=longlat +datum=WGS84 +no_defs
#> # A tibble: 5 x 5
#> CELL_ID prio_country_sf POP acled_sf geometry
#> <dbl> <chr> <dbl> <int> <POLYGON [°]>
#> 1 140055 Nigeria 527012. 5 ((7 7, 7 7.5, 7.5 7.5, 7.5 7, …
#> 2 145866 South Sudan 23006. 6 ((32.5 11, 32.5 11.5, 33 11.5,…
#> 3 150830 Mali 12169. 4 ((-5.5 14.5, -5.5 15, -5 15, -…
#> 4 176783 Tunisia 107370. 4 ((11 32.5, 11 33, 11.5 33, 11.…
#> 5 180365 Algeria 111984. 4 ((2 35, 2 35.5, 2.5 35.5, 2.5 …