我有两个数据表。一列包含有关房屋的信息,包括两列具有坐标的列(一列代表东方,一列代表北方)。一个包含有关站点的信息,其中包括两列带有坐标的列。坐标为瑞士LV95格式。
> head(d.small)
id GKODE GKODN
1: 100 2608850 1262583
2: 10000012 2612952 1267232
3: 11776807 2612952 1267232
4: 10000061 2623360 1247413
5: 10000223 2622938 1259411
6: 9997602 2609194 1262383
> head(haltestelle.small)
y_Koord_Ost x_Koord_Nord
1: 2633061 1257737
2: 2630544 1252831
3: 2628497 1256841
4: 2629649 1255767
5: 2619156 1264531
6: 2619741 1247012
现在我需要到最近的车站的距离。
下面的代码计算一所房子和所有车站之间的距离,采用最小距离并添加相应的索引。
不幸的是,我的代码太慢了。如何向量化循环?
dist.oev <- data.table(dist.oev=rep(1, nrow(d.small)), dist.oev.index=rep(1, nrow(d.small)))
for (i in 1:nrow(d.small)) {
cat(i, " ")
for ( j in 1:nrow(haltestelle.small)) {
diff.ost <- d.small[i, .(GKODE)] - haltestelle.small[j, .(y_Koord_Ost)]
diff.nord <- d.small[i, .(GKODN)] - haltestelle.small[j, .(x_Koord_Nord)]
dist.oev[i,1] <- min(sqrt(diff.ost^2 + diff.nord^2))
dist.oev[i,2] <- which.min(sqrt(diff.ost^2 + diff.nord^2))
}
}
答案 0 :(得分:1)
这是一个解决方案,只要它适合您的RAM,就应该相对较快。我提供了一个可重现的示例,即使它不是您的确切数据。 h
包含您的测站坐标,而d
包含您的房屋坐标:
h <- data.frame(x=rnorm(1000),y=rnorm(1000))
d <- data.frame(x=rnorm(500),y=rnorm(500))
xdiff <- sapply(h$x,function(x1)sapply(d$x,function(x2)(x1-x2)^2))
ydiff <- sapply(h$y,function(y1)sapply(d$y,function(y2)(y1-y2)^2))
dist2 <- xdiff+ydiff
closest <- apply(dist2,1,which.min)
min.dist <- sqrt(dist2[cbind(seq(500),closest)])
代码首先计算x中所有平方差,然后计算y中所有平方差,并将它们相加以获得矩阵中的平方欧几里德距离。
答案 1 :(得分:1)
这可能是您想要的:
d.small[,
mindist := sqrt(min((GKODE - haltestelle.small[["y_Koord_Ost"]])^2 +
(GKODN - haltestelle.small[["x_Koord_Nord"]])^2)),
by = id]
# id GKODE GKODN mindist
# 1: 100 2608850 1262583 10488.486
# 2: 10000012 2612952 1267232 6766.463
# 3: 11776807 2612952 1267232 6766.463
# 4: 10000061 2623360 1247413 3641.148
# 5: 10000223 2622938 1259411 6124.327
# 6: 9997602 2609194 1262383 10190.944
数据(可复制格式):
d.small <- fread("id GKODE GKODN
100 2608850 1262583
10000012 2612952 1267232
11776807 2612952 1267232
10000061 2623360 1247413
10000223 2622938 1259411
9997602 2609194 1262383")
haltestelle.small <- fread("y_Koord_Ost x_Koord_Nord
2633061 1257737
2630544 1252831
2628497 1256841
2629649 1255767
2619156 1264531
2619741 1247012")
只需少量修改,您的代码就不会有错误:
for (i in 1:nrow(d.small)) {
diff.ost <- d.small[i, GKODE] - haltestelle.small[, y_Koord_Ost]
diff.nord <- d.small[i, GKODN] - haltestelle.small[, x_Koord_Nord]
dist.oev[i,1] <- sqrt(min(diff.ost^2 + diff.nord^2)) # take sqrt outside min for efficiency
dist.oev[i,2] <- which.min(diff.ost^2 + diff.nord^2) # sqrt unnecessary (monotonic transformation)
}
还可以通过以下方式获取索引:
d.small[,
c("mindist", "mindist_index") := {
dist = (GKODE - haltestelle.small[["y_Koord_Ost"]])^2 + (GKODN - haltestelle.small[["x_Koord_Nord"]])^2
.(sqrt(min(dist)), which.min(dist))
},
by = id]
答案 2 :(得分:0)
使用sf
-软件包的解决方案:
(由于我不熟悉纬度和经度,所以我不确定 LV95格式。)
# Sample data
library(data.table)
d.small <- fread("id GKODE GKODN
100 2608850 1262583
10000012 2612952 1267232
11776807 2612952 1267232
10000061 2623360 1247413
10000223 2622938 1259411
9997602 2609194 1262383")
haltestelle.small <- fread("y_Koord_Ost x_Koord_Nord
2633061 1257737
2630544 1252831
2628497 1256841
2629649 1255767
2619156 1264531
2619741 1247012")
# Create spatial objects
library(sf)
d.sf <- d.small %>% sf::st_as_sf( coords = c("GKODE", "GKODN"), crs = 2056)
haltestelle.sf <- haltestelle.small %>% sf::st_as_sf( coords = c("y_Koord_Ost", "x_Koord_Nord"), crs = 2056)
# Calculate nearest haltestelle for each d.small
d.sf %>%
dplyr::group_by( id ) %>%
dplyr::mutate( np = sf::st_nearest_feature( geometry, haltestelle.sf ),
dist_np = as.numeric( sf::st_distance( geometry, d.sf[np,] ) ) )
id np dist_np geometry
<int> <int> <dbl> <POINT [m]>
1 100 5 14441. (2608850 1262583)
2 10000012 5 12684. (2612952 1267232)
3 11776807 5 12684. (2612952 1267232)
4 10000061 6 20610. (2623360 1247413)
5 10000223 3 12684. (2622938 1259411)
6 9997602 5 14062. (2609194 1262383)