R:在这种情况下如何使用应用来加速功能?

时间:2019-06-27 09:08:27

标签: r dataframe apply distance sf

我正在尝试计算点与每个点最近的多边形之间的距离。我当前正在使用函数st_distance(library sf),这似乎是最快的方法。但是仍然需要很多时间。 这就是为什么我想将我正在使用的循环更改为应用过程或以更快的方式进行此操作的原因。有人可以帮我做那个吗? 谢谢!

## Importation of shapefiles
# library(rgdal)
# pathToShp = "J:/shp_files/"
# points = readOGR(dsn = pathToShp, layer="points_2154", stringsAsFactors=FALSE)   #Points in EPSG 2154 Lambert
# polygons = readOGR(dsn = pathToShp, layer="polygons_2154", stringsAsFactors=FALSE)   #Polygons

library(sf)
# points_sf = st_as_sf(points)
# polygons_sf = st_as_sf(polygons)

## Search the closest polygon for each point
point_polygon <- c()
point_polygon = st_join(points_sf, polygons_sf, join = st_nearest_feature)     # ID of the closest polygon for each point

## Distance between each point and the closest polygon
dist_sf <- c()
for (i in 1:nrow(points_sf)) {
  dist_sf[i] = st_distance(points_sf[i,], 
                           polygons_sf[polygons_sf$ID == point_polygon$ID[i], ], 
                           by_element = TRUE)    
}

您应该获取:

dist_sf
# [1] 514830.0 260656.0 260647.7 260653.5 262053.6

数据

points_sf <- structure(list(field_1 = c("1", "2", "3", "4", "5"), adresse = c("6 RUE DES VIGNES, 40140 SOUSTONS, France", 
"22 RUE DE PARIS, 03000 MOULINS, France", "5 RUE REGNAUDIN, 03000 MOULINS, France", 
"31 RUE DE PARIS, 03000 MOULINS, France", "15 RUE DES RAMIERS, 85360 LA TRANCHE SUR MER, France"
), latitude = c(43.75395, 46.56875, 46.56893, 46.56873, 46.35638
), longitude = c(-1.31277, 3.330296, 3.330394, 3.330224, -1.470842
), geometry = structure(list(structure(c(352768.516216819, 6304476.86420524
), class = c("XY", "POINT", "sfg")), structure(c(725298.307259582, 
6607688.02981763), class = c("XY", "POINT", "sfg")), structure(c(725305.729670888, 
6607708.05130113), class = c("XY", "POINT", "sfg")), structure(c(725292.801896427, 
6607685.78563239), class = c("XY", "POINT", "sfg")), structure(c(356412.817813797, 
6593779.89675049), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
"sfc"), precision = 0, bbox = structure(c(xmin = 352768.516216819, 
ymin = 6304476.86420524, xmax = 725305.729670888, ymax = 6607708.05130113
), class = "bbox"), crs = structure(list(epsg = NA_integer_, 
    proj4string = "+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(field_1 = NA_integer_, 
adresse = NA_integer_, latitude = NA_integer_, longitude = NA_integer_
), .Label = c("constant", "aggregate", "identity"), class = "factor"), row.names = c(NA, 
5L), class = c("sf", "data.frame"))

polygons_sf <- structure(list(ID = c("M1204300", "E6490620", "E4240850"), geometry = structure(list(
    structure(list(structure(c(533957.599997006, 534047.299997008, 
    534171.89999701, 534191.69999701, 534226.099997011, 534270.599997012, 
    534325.099997013, 534369.799997014, 534449.399997015, 534549.199997017, 
    534674.099997019, 534924.099997023, 535084.299997026, 535174.199997028, 
    535239.099997029, 535293.89999703, 535323.599997031, 6786523.09989417, 
    6786492.39989417, 6786461.39989417, 6786436.19989417, 6786370.99989417, 
    6786305.69989417, 6786255.19989417, 6786219.89989417, 6786184.19989417, 
    6786163.39989417, 6786162.39989417, 6786185.29989417, 6786218.89989417, 
    6786218.19989417, 6786207.69989417, 6786182.19989417, 6786156.99989417
    ), .Dim = c(17L, 2L))), class = c("XY", "MULTILINESTRING", 
    "sfg")), structure(list(structure(c(608743.099998312, 608792.899998313, 
    608827.799998314, 608847.799998314, 608867.999998314, 608918.699998315, 
    608974.499998316, 609015.299998317, 609071.299998318, 609086.499998318, 
    609106.399998319, 609156.19999832, 609181.59999832, 609197.09999832, 
    609202.299998321, 609217.599998321, 609257.999998322, 609273.299998322, 
    609324.099998323, 609354.699998323, 7003205.49989546, 7003185.09989545, 
    7003164.79989545, 7003169.69989546, 7003194.49989546, 7003278.99989546, 
    7003378.49989546, 7003478.09989546, 7003597.59989546, 7003623.39989546, 
    7003618.29989546, 7003592.89989546, 7003642.59989546, 7003702.49989546, 
    7003732.39989546, 7003767.29989546, 7003816.89989546, 7003856.79989546, 
    7003946.29989546, 7004020.99989546), .Dim = c(20L, 2L))), class = c("XY", 
    "MULTILINESTRING", "sfg")), structure(list(structure(c(669193.399999424, 
    669183.499999424, 669153.399999423, 669097.999999422, 669077.999999422, 
    669048.599999421, 7097101.79989609, 7097111.89989609, 7097102.09989609, 
    7097047.59989609, 7097052.79989609, 7097123.99989609), .Dim = c(6L, 
    2L)), structure(c(669048.599999421, 669022.899999421, 668953.19999942, 
    668888.899999418, 668854.499999418, 668809.899999417, 668790.299999417, 
    668740.899999416, 668721.199999415, 668656.799999414, 668637.199999414, 
    668618.099999413, 7097123.99989609, 7097149.19989609, 7097189.79989609, 
    7097265.29989609, 7097340.49989609, 7097385.89989609, 7097430.99989609, 
    7097496.39989609, 7097532.59989609, 7097598.09989609, 7097653.1998961, 
    7097758.39989609), .Dim = c(12L, 2L)), structure(c(668618.099999413, 
    668598.799999413, 668553.799999412, 668519.299999411, 668435.09999941, 
    668335.899999408, 668159.599999405, 7097758.39989609, 7097833.49989609, 
    7097949.7998961, 7098010.0998961, 7098095.7998961, 7098191.4998961, 
    7098459.3998961), .Dim = c(7L, 2L))), class = c("XY", "MULTILINESTRING", 
    "sfg"))), class = c("sfc_MULTILINESTRING", "sfc"), precision = 0, bbox = structure(c(xmin = 533957.599997006, 
ymin = 6786156.99989417, xmax = 669193.399999424, ymax = 7098459.3998961
), class = "bbox"), crs = structure(list(epsg = NA_integer_, 
    proj4string = "+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs"), class = "crs"), n_empty = 0L)), row.names = 0:2, class = c("sf", 
"data.frame"), sf_column = "geometry", agr = structure(c(ID = NA_integer_), class = "factor", .Label = c("constant", 
"aggregate", "identity")))

1 个答案:

答案 0 :(得分:2)

此:

apply(st_distance(points_sf, polygons_sf), 1, min)

似乎是最快的选择。虽然本机sf版本的运行速度并不慢。 实际时间请见下文

library(microbenchmark)

microbenchmark(
    loop = {
        point_polygon = st_join(points_sf, polygons_sf, join = st_nearest_feature)
        ## Distance between each point and the closest polygon
        dist_sf <- c()
        for (i in 1:nrow(points_sf)) {
            dist_sf[i] = st_distance(points_sf[i,], 
                                     polygons_sf[polygons_sf$ID == point_polygon$ID[i], ], 
                                     by_element = TRUE)    
        }
    },
    apply = { apply(st_distance(points_sf, polygons_sf), 1, min) },
    native = {
        polys = polygons_sf[st_nearest_feature(points_sf, polygons_sf), ]
        st_length(st_nearest_points(points_sf, polys, pairwise = TRUE))
    },
    dt = {
        dist = as.data.table(st_distance(points_sf, polygons_sf))
        dist[, pmin(V1, V2, V3)]
    },
    times = 10
)


Unit: milliseconds
   expr     min       lq      mean   median       uq     max neval  cld
   loop 29.2660 30.36030 32.092494 30.95950 32.97390 42.5732   100    d
  apply  2.7579  2.90365  3.124069  2.96670  3.20515  5.0635   100 a   
 native  3.9875  4.13340  4.566414  4.24310  4.55095 11.9232   100   c 
     dt  3.4089  3.57920  3.838198  3.66055  3.93795  8.6983   100  b