我正在尝试计算点与每个点最近的多边形之间的距离。我当前正在使用函数st_distance(library sf),这似乎是最快的方法。但是仍然需要很多时间。 这就是为什么我想将我正在使用的循环更改为应用过程或以更快的方式进行此操作的原因。有人可以帮我做那个吗? 谢谢!
## Importation of shapefiles
# library(rgdal)
# pathToShp = "J:/shp_files/"
# points = readOGR(dsn = pathToShp, layer="points_2154", stringsAsFactors=FALSE) #Points in EPSG 2154 Lambert
# polygons = readOGR(dsn = pathToShp, layer="polygons_2154", stringsAsFactors=FALSE) #Polygons
library(sf)
# points_sf = st_as_sf(points)
# polygons_sf = st_as_sf(polygons)
## Search the closest polygon for each point
point_polygon <- c()
point_polygon = st_join(points_sf, polygons_sf, join = st_nearest_feature) # ID of the closest polygon for each point
## Distance between each point and the closest polygon
dist_sf <- c()
for (i in 1:nrow(points_sf)) {
dist_sf[i] = st_distance(points_sf[i,],
polygons_sf[polygons_sf$ID == point_polygon$ID[i], ],
by_element = TRUE)
}
您应该获取:
dist_sf
# [1] 514830.0 260656.0 260647.7 260653.5 262053.6
points_sf <- structure(list(field_1 = c("1", "2", "3", "4", "5"), adresse = c("6 RUE DES VIGNES, 40140 SOUSTONS, France",
"22 RUE DE PARIS, 03000 MOULINS, France", "5 RUE REGNAUDIN, 03000 MOULINS, France",
"31 RUE DE PARIS, 03000 MOULINS, France", "15 RUE DES RAMIERS, 85360 LA TRANCHE SUR MER, France"
), latitude = c(43.75395, 46.56875, 46.56893, 46.56873, 46.35638
), longitude = c(-1.31277, 3.330296, 3.330394, 3.330224, -1.470842
), geometry = structure(list(structure(c(352768.516216819, 6304476.86420524
), class = c("XY", "POINT", "sfg")), structure(c(725298.307259582,
6607688.02981763), class = c("XY", "POINT", "sfg")), structure(c(725305.729670888,
6607708.05130113), class = c("XY", "POINT", "sfg")), structure(c(725292.801896427,
6607685.78563239), class = c("XY", "POINT", "sfg")), structure(c(356412.817813797,
6593779.89675049), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 352768.516216819,
ymin = 6304476.86420524, xmax = 725305.729670888, ymax = 6607708.05130113
), class = "bbox"), crs = structure(list(epsg = NA_integer_,
proj4string = "+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(field_1 = NA_integer_,
adresse = NA_integer_, latitude = NA_integer_, longitude = NA_integer_
), .Label = c("constant", "aggregate", "identity"), class = "factor"), row.names = c(NA,
5L), class = c("sf", "data.frame"))
polygons_sf <- structure(list(ID = c("M1204300", "E6490620", "E4240850"), geometry = structure(list(
structure(list(structure(c(533957.599997006, 534047.299997008,
534171.89999701, 534191.69999701, 534226.099997011, 534270.599997012,
534325.099997013, 534369.799997014, 534449.399997015, 534549.199997017,
534674.099997019, 534924.099997023, 535084.299997026, 535174.199997028,
535239.099997029, 535293.89999703, 535323.599997031, 6786523.09989417,
6786492.39989417, 6786461.39989417, 6786436.19989417, 6786370.99989417,
6786305.69989417, 6786255.19989417, 6786219.89989417, 6786184.19989417,
6786163.39989417, 6786162.39989417, 6786185.29989417, 6786218.89989417,
6786218.19989417, 6786207.69989417, 6786182.19989417, 6786156.99989417
), .Dim = c(17L, 2L))), class = c("XY", "MULTILINESTRING",
"sfg")), structure(list(structure(c(608743.099998312, 608792.899998313,
608827.799998314, 608847.799998314, 608867.999998314, 608918.699998315,
608974.499998316, 609015.299998317, 609071.299998318, 609086.499998318,
609106.399998319, 609156.19999832, 609181.59999832, 609197.09999832,
609202.299998321, 609217.599998321, 609257.999998322, 609273.299998322,
609324.099998323, 609354.699998323, 7003205.49989546, 7003185.09989545,
7003164.79989545, 7003169.69989546, 7003194.49989546, 7003278.99989546,
7003378.49989546, 7003478.09989546, 7003597.59989546, 7003623.39989546,
7003618.29989546, 7003592.89989546, 7003642.59989546, 7003702.49989546,
7003732.39989546, 7003767.29989546, 7003816.89989546, 7003856.79989546,
7003946.29989546, 7004020.99989546), .Dim = c(20L, 2L))), class = c("XY",
"MULTILINESTRING", "sfg")), structure(list(structure(c(669193.399999424,
669183.499999424, 669153.399999423, 669097.999999422, 669077.999999422,
669048.599999421, 7097101.79989609, 7097111.89989609, 7097102.09989609,
7097047.59989609, 7097052.79989609, 7097123.99989609), .Dim = c(6L,
2L)), structure(c(669048.599999421, 669022.899999421, 668953.19999942,
668888.899999418, 668854.499999418, 668809.899999417, 668790.299999417,
668740.899999416, 668721.199999415, 668656.799999414, 668637.199999414,
668618.099999413, 7097123.99989609, 7097149.19989609, 7097189.79989609,
7097265.29989609, 7097340.49989609, 7097385.89989609, 7097430.99989609,
7097496.39989609, 7097532.59989609, 7097598.09989609, 7097653.1998961,
7097758.39989609), .Dim = c(12L, 2L)), structure(c(668618.099999413,
668598.799999413, 668553.799999412, 668519.299999411, 668435.09999941,
668335.899999408, 668159.599999405, 7097758.39989609, 7097833.49989609,
7097949.7998961, 7098010.0998961, 7098095.7998961, 7098191.4998961,
7098459.3998961), .Dim = c(7L, 2L))), class = c("XY", "MULTILINESTRING",
"sfg"))), class = c("sfc_MULTILINESTRING", "sfc"), precision = 0, bbox = structure(c(xmin = 533957.599997006,
ymin = 6786156.99989417, xmax = 669193.399999424, ymax = 7098459.3998961
), class = "bbox"), crs = structure(list(epsg = NA_integer_,
proj4string = "+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs"), class = "crs"), n_empty = 0L)), row.names = 0:2, class = c("sf",
"data.frame"), sf_column = "geometry", agr = structure(c(ID = NA_integer_), class = "factor", .Label = c("constant",
"aggregate", "identity")))
答案 0 :(得分:2)
此:
apply(st_distance(points_sf, polygons_sf), 1, min)
似乎是最快的选择。虽然本机sf版本的运行速度并不慢。 实际时间请见下文
library(microbenchmark)
microbenchmark(
loop = {
point_polygon = st_join(points_sf, polygons_sf, join = st_nearest_feature)
## Distance between each point and the closest polygon
dist_sf <- c()
for (i in 1:nrow(points_sf)) {
dist_sf[i] = st_distance(points_sf[i,],
polygons_sf[polygons_sf$ID == point_polygon$ID[i], ],
by_element = TRUE)
}
},
apply = { apply(st_distance(points_sf, polygons_sf), 1, min) },
native = {
polys = polygons_sf[st_nearest_feature(points_sf, polygons_sf), ]
st_length(st_nearest_points(points_sf, polys, pairwise = TRUE))
},
dt = {
dist = as.data.table(st_distance(points_sf, polygons_sf))
dist[, pmin(V1, V2, V3)]
},
times = 10
)
Unit: milliseconds
expr min lq mean median uq max neval cld
loop 29.2660 30.36030 32.092494 30.95950 32.97390 42.5732 100 d
apply 2.7579 2.90365 3.124069 2.96670 3.20515 5.0635 100 a
native 3.9875 4.13340 4.566414 4.24310 4.55095 11.9232 100 c
dt 3.4089 3.57920 3.838198 3.66055 3.93795 8.6983 100 b