我已经查看了几个答案,但未能将其应用于我的问题。参见:
Calculating the distance between points in different data frames
Calculating number of points within a certain radius
find locations within certain lat/lon distance in r
find number of points within a radius in R using lon and lat coordinates
Identify points within specified distance in R
我有loc
和stop
。对于每个stop
,我想找到loc
的距离。
我的位置
loc <- data.frame(station = c('Baker Street','Bank'),
lat = c(51.522236,51.5134047),
lng = c(-0.157080, -0.08905843),
postcode = c('NW1','EC3V')
)
我的停止
stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
lat = c(51.53253,51.520865,51.490281,51.51224),
lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
postcode = c('EC1V','EC1A', 'W14', 'W2'))
作为最终结果,我想要这样的事情:
df <- data.frame(loc = c('Baker Street','Bank','Baker Street','Bank','Baker Street','Bank','Baker Street','Bank'),
stop = c('Angel','Barbican','Barons Court','Bayswater','Angel','Barbican','Barons Court','Bayswater'),
dist = c('x','x','x','x','x','x','x','x'),
lat = c(51.53253,51.520865,51.490281,51.51224,51.53253,51.520865,51.490281,51.51224),
lng = c(-0.10579,-0.097758,-0.214340,-0.187569,-0.10579,-0.097758,-0.214340,-0.187569),
postcode = c('EC1V','EC1A', 'W14', 'W2','EC1V','EC1A', 'W14', 'W2')
)
我的数据集比较大,所以我正在寻找一种有效的方法来解决这个问题。
关于如何实现这一目标的任何想法?
答案 0 :(得分:4)
这会使用expand.grid
和merge
一些广告素材变量重命名。它有点人性化,但由于操作是矢量化的,所以非常有效。
library(dplyr)
df <- expand.grid(station = loc$station, stop = stop$station) %>%
merge(loc, by = 'station') %>%
rename(loc = station, lat1 = lat, lng1 = lng, station = stop) %>%
select(-postcode) %>%
merge(stop, by = 'station') %>%
rename(stop = station, lat2 = lat, lng2 = lng)
# stop loc lat1 lng1 lat2 lng2 postcode
# 1 Angel Baker Street 51.52224 -0.15708000 51.53253 -0.105790 EC1V
# 2 Angel Bank 51.51340 -0.08905843 51.53253 -0.105790 EC1V
# 3 Barbican Baker Street 51.52224 -0.15708000 51.52087 -0.097758 EC1A
# 4 Barbican Bank 51.51340 -0.08905843 51.52087 -0.097758 EC1A
# 5 Barons Court Baker Street 51.52224 -0.15708000 51.49028 -0.214340 W14
# 6 Barons Court Bank 51.51340 -0.08905843 51.49028 -0.214340 W14
# 7 Bayswater Baker Street 51.52224 -0.15708000 51.51224 -0.187569 W2
# 8 Bayswater Bank 51.51340 -0.08905843 51.51224 -0.187569 W2
然后我们可以使用geosphere::distHaversine()
(受雅各布启发)使用Haversine formula来计算距离。
df$dist_meters <- geosphere::distHaversine(select(df, lng1, lat1),
select(df, lng2, lat2))
df %>%
select(stop, loc, dist_meters)
# stop loc dist_meters
# 1 Angel Baker Street 3732.422
# 2 Angel Bank 2423.989
# 3 Barbican Baker Street 4111.786
# 4 Barbican Bank 1026.091
# 5 Barons Court Baker Street 5328.649
# 6 Barons Court Bank 9054.998
# 7 Bayswater Baker Street 2387.231
# 8 Bayswater Bank 6825.897
如果您好奇Haversine公式如何运作,
latrad1 <- df$lat1 * pi/180
latrad2 <- df$lat2 * pi/180
dlat <- df$dlat * pi/180
dlng <- df$dlng * pi/180
a <- sin(dlat / 2)^2 + sin(dlng / 2)^2 * cos(latrad1) * cos(latrad2)
dist_rad <- 2 * atan2(sqrt(a), sqrt(1-a))
df %>%
mutate(dist_meters_byhand = dist_rad * 6378137) %>%
select(stop, loc, dist_meters_geosphere = dist_meters, dist_meters_byhand)
# stop loc dist_meters_geosphere dist_meters_byhand
# 1 Angel Baker Street 3732.422 3732.422
# 2 Angel Bank 2423.989 2423.989
# 3 Barbican Baker Street 4111.786 4111.786
# 4 Barbican Bank 1026.091 1026.091
# 5 Barons Court Baker Street 5328.649 5328.649
# 6 Barons Court Bank 9054.998 9054.998
# 7 Bayswater Baker Street 2387.231 2387.231
# 8 Bayswater Bank 6825.897 6825.897
答案 1 :(得分:0)
不像@ Ben那样聪明(或者可能快)但是另一种方式:
library(geosphere)
master_df <- data.frame()
for (i in 1:nrow(loc)){
this_loc <- loc[i, 1]
temp_df <- cbind(stop,
data.frame(loc = this_loc,
dist = distm(as.matrix(stop[, 2:3]), c(loc[i, 2], loc[i, 3]))))
master_df <- rbind(master_df, temp_df)
}
geosphere包默认使用hasrsine,如果需要精确度,这可能很有用。