为什么我的空间连接使用sp和sf包返回不同的结果?

时间:2018-06-27 04:11:09

标签: r spatial sp sf

在下面的reprex中,我对某些点和多边形数据运行了空间连接,但是使用sp包和使用sf包时意外地得到了不同的结果。为什么会这样?

我试图计算acled网格正方形内的prio个点,但是如下所示,即使从st_covers运行sf联接,包之间的计数也有所不同,据我所知应该在功能上与使用over中的sp方法相同。

library(sp) # packageVersion("sp") #> [1] ‘1.2.7’
library(sf) # packageVersion("sf") #> [1] ‘0.6.3’
library(rgdal)
library(maptools)
library(dplyr); library(tibble)

以下是我正在使用的示例数据:

# prio (polygon squares) and acled (points); in both sp and sf objects:

# prio sf polygons object
priosf <- structure(list(
  CELL_ID = c(180365, 176783, 150830, 145866, 140055), 
  gwno = c(615L, 616L, 432L, 626L, 475L), 
  POP = c(111983.7, 107369.7, 12169.35, 23005.76, 527012.1), 
  prio_country = c("Algeria", "Tunisia", "Mali", "South Sudan", "Nigeria"), 
  geometry = structure(list(structure(list(structure(c(2, 2, 2.5, 2.5, 2, 35, 35.5, 35.5, 35, 35), 
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")), 
                            structure(list(structure(c(11, 11, 11.5, 11.5, 11, 32.5, 33, 33, 32.5, 32.5), 
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")), 
                            structure(list(structure(c(-5.5, -5.5, -5, -5, -5.5, 14.5, 15, 15, 14.5, 14.5), 
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")), 
                            structure(list(structure(c(32.5, 32.5, 33, 33, 32.5, 11, 11.5, 11.5, 11, 11),
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")), 
                            structure(list(structure(c(7, 7, 7.5, 7.5, 7, 7, 7.5, 7.5, 7, 7), 
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg"))), 
                       class = c("sfc_POLYGON", "sfc"), precision = 0, 
                       bbox = structure(c(-5.5, 7, 33, 35.5), 
                                        .Names = c("xmin", "ymin", "xmax", "ymax"), 
                                        class = "bbox"), 
                       crs = structure(list(epsg = 4326L, proj4string = "+proj=longlat +datum=WGS84 +no_defs"), 
                                       .Names = c("epsg", "proj4string"), class = "crs"), n_empty = 0L)), 
              .Names = c("CELL_ID", "gwno", "POP", "prio_country", "geometry"),
  row.names = c(NA, -5L), class = c("sf", "tbl_df", "tbl", "data.frame"),
  sf_column = "geometry", agr = structure(c(NA_integer_, NA_integer_, NA_integer_, NA_integer_), 
                                          class = "factor", .Label = c("constant", "aggregate", "identity"), 
                                          .Names = c("CELL_ID", "gwno", "POP", "prio_country")))

# prio sp polygons object
priosp <- as(priosf, 'Spatial')

# acled data
acled <- structure(list(
  EVENT_ID_CNTY = c("ALG3195", "ALG3316", "ALG4228", 
      "ALG4824", "MLI1050", "MLI1144", "MLI1423", "MLI1672", "NIG4606", 
      "NIG4951", "NIG6196", "NIG7661", "NIG9100", "SSD1216", "SSD1504", 
      "SSD3232", "SSD3234", "SSD3231", "SSD3239", "TUN1376", "TUN2597", 
      "TUN3217", "TUN3633"), 
  COUNTRY = c("Algeria", "Algeria", "Algeria", 
              "Algeria", "Mali", "Mali", "Mali", "Mali", "Nigeria", "Nigeria", 
              "Nigeria", "Nigeria", "Nigeria", "South Sudan", "South Sudan", 
              "South Sudan", "South Sudan", "South Sudan", "South Sudan", "Tunisia", 
              "Tunisia", "Tunisia", "Tunisia"), 
  LATITUDE = c(35.2122, 35.4343, 35.2122, 35.2122, 14.8252, 14.8252, 14.7414, 14.8252, 7.3028, 
               7.3028, 7.3028, 7.3028, 7.3588, 11.05, 11.05, 11.05, 11.05, 11.05, 11.05, 32.8487, 32.7149, 32.7149, 32.7149), 
  LONGITUDE = c(2.3189, 2.2166, 2.3189, 2.3189, -5.2547, -5.2547, -5.3282, -5.2547, 7.0382, 7.0382, 7.0382, 7.0382, 7.0994, 32.7, 32.7, 32.7, 32.7, 32.7, 32.7, 11.4309, 11.012, 11.012, 11.012)), 
  row.names = c(NA, -23L), 
  class = c("tbl_df", "tbl", "data.frame"), 
  .Names = c("EVENT_ID_CNTY", "COUNTRY", "LATITUDE", "LONGITUDE"))

# acled sf points object
acledsf <- st_as_sf(
  acled,
  coords = c('LATITUDE', 'LONGITUDE'),
  crs = 4326
)

# acled sp points object
coordinates(acled) <- ~LONGITUDE+LATITUDE
  proj4string(acled) <- proj4string(priosp)
acledsp <- acled; rm(acled)

sp打包空间连接结果。我绑定了与每个点相交的多边形,将结果连接到这些点,然后计算了CELL_ID(多边形)的数量:

# sp spatial join:
addPolyDataToPts <- function (points, poly) {
  polysByPoint <- over(points, poly)
  points <- spCbind(points, polysByPoint)
}

acj <- addPolyDataToPts(acledsp, priosp)

(acled_count_sp <- acj@data %>% filter(!is.na(CELL_ID)) %>%
  group_by(CELL_ID, prio_country, POP) %>%
  summarize(acled_sp = n()) %>% arrange(CELL_ID) %>%
  rename(prio_country_sp = prio_country))
#> # A tibble: 5 x 4
#> # Groups:   CELL_ID, prio_country_sp [5]
#>   CELL_ID prio_country_sp     POP acled_sp
#>     <dbl> <chr>             <dbl>    <int>
#> 1 140055. Nigeria         527012.        5
#> 2 145866. South Sudan      23006.        6
#> 3 150830. Mali             12169.        4
#> 4 176783. Tunisia         107370.        4
#> 5 180365. Algeria         111984.        4

模拟sf打包空间连接结果,其中除一个多边形正方形外,我的计数列acled_sf与上面的acled_sp列不同。 (140055;尼日利亚):

# sf spatial join:
(acled_count_sf <- 
  st_join(priosf, acledsf, join = st_covers) %>%
  group_by(CELL_ID, POP, prio_country) %>%
  summarize(acled_sf = n()) %>% ungroup %>% 
  arrange(CELL_ID) %>%
  rename(prio_country_sf = prio_country))
#> although coordinates are longitude/latitude, st_covers assumes that they are planar
#> Simple feature collection with 5 features and 4 fields
#> geometry type:  POLYGON
#> dimension:      XY
#> bbox:           xmin: -5.5 ymin: 7 xmax: 33 ymax: 35.5
#> epsg (SRID):    4326
#> proj4string:    +proj=longlat +datum=WGS84 +no_defs
#> # A tibble: 5 x 5
#>   CELL_ID     POP prio_country_sf acled_sf                        geometry
#>     <dbl>   <dbl> <chr>              <int>                   <POLYGON [°]>
#> 1 140055. 527012. Nigeria                5 ((7 7, 7 7.5, 7.5 7.5, 7.5 7, …
#> 2 145866.  23006. South Sudan            4 ((32.5 11, 32.5 11.5, 33 11.5,…
#> 3 150830.  12169. Mali                   1 ((-5.5 14.5, -5.5 15, -5 15, -…
#> 4 176783. 107370. Tunisia                6 ((11 32.5, 11 33, 11.5 33, 11.…
#> 5 180365. 111984. Algeria                1 ((2 35, 2 35.5, 2.5 35.5, 2.5 …

我的运行理论是,一种方法以错误的顺序绑定值,但是我不确定是哪种方法。在我的较大示例中,我得到了相似的值,但绑定到了不同的多边形,即对于sf连接,'2706'点与单元1匹配,对于sp连接,与单元2匹配。

(而且,在某些情况下,sf联接中会丢失一些值)

任何对我的结果如何或为什么以这种方式有所不同的见解都将受到赞赏。

1 个答案:

答案 0 :(得分:5)

因此,我花了点时间在mapview中绘制数据以弄清楚这里发生了什么,但是至少在给定的reprex中,您的问题是由于创建acledsf时向后指定了经度和纬度宾语。以正确的顺序创建,并且联接输出匹配:

# acled sf points object
acledsf <- st_as_sf(
  acled,
  coords = c('LONGITUDE', 'LATITUDE'),  ###notice the correct order here
  crs = 4326
) 

# acled sp points object
coordinates(acled) <- c("LONGITUDE", "LATITUDE")
proj4string(acled) <- proj4string(priosp)
acledsp <- acled; rm(acled)


addPolyDataToPts <- function (points, poly) {
  polysByPoint <- over(points, poly)
  points <- spCbind(points, polysByPoint)
}

acj <- addPolyDataToPts(acledsp, priosp)

(acled_count_sp <- acj@data %>% filter(!is.na(CELL_ID)) %>%
    group_by(CELL_ID, prio_country, POP) %>%
    summarize(acled_sp = n()) %>% arrange(CELL_ID) %>%
    rename(prio_country_sp = prio_country))
#> # A tibble: 5 x 4
#> # Groups:   CELL_ID, prio_country_sp [5]
#>   CELL_ID prio_country_sp     POP acled_sp
#>     <dbl> <chr>             <dbl>    <int>
#> 1  140055 Nigeria         527012.        5
#> 2  145866 South Sudan      23006.        6
#> 3  150830 Mali             12169.        4
#> 4  176783 Tunisia         107370.        4
#> 5  180365 Algeria         111984.        4


### sf
(acled_count_sf <- 
    st_join(priosf, acledsf, join = st_covers) %>%
    group_by(CELL_ID, prio_country,  POP) %>%
    summarize(acled_sf = n()) %>% ungroup %>% 
    arrange(CELL_ID) %>%
    rename(prio_country_sf = prio_country))
#> although coordinates are longitude/latitude, st_covers assumes that they are planar
#> Simple feature collection with 5 features and 4 fields
#> geometry type:  POLYGON
#> dimension:      XY
#> bbox:           xmin: -5.5 ymin: 7 xmax: 33 ymax: 35.5
#> epsg (SRID):    4326
#> proj4string:    +proj=longlat +datum=WGS84 +no_defs
#> # A tibble: 5 x 5
#>   CELL_ID prio_country_sf     POP acled_sf                        geometry
#>     <dbl> <chr>             <dbl>    <int>                   <POLYGON [°]>
#> 1  140055 Nigeria         527012.        5 ((7 7, 7 7.5, 7.5 7.5, 7.5 7, …
#> 2  145866 South Sudan      23006.        6 ((32.5 11, 32.5 11.5, 33 11.5,…
#> 3  150830 Mali             12169.        4 ((-5.5 14.5, -5.5 15, -5 15, -…
#> 4  176783 Tunisia         107370.        4 ((11 32.5, 11 33, 11.5 33, 11.…
#> 5  180365 Algeria         111984.        4 ((2 35, 2 35.5, 2.5 35.5, 2.5 …