我有enum
可以显示始发地和目的地城市,我想知道它们有多远(英里/公里)。在DF_1
中,我得到了城市之间的距离。我怎么知道这两个DF的距离?
DF_2
:
DF_1
origin <- c('LONDON','NEW YORK','TOKIO','LONDON','RIO DE JANEIRO')
destination <- c('NEW YORK','NEW YORK','RIO DE JANEIRO','LISBON','MADRID')
DF_1 <- data.frame(origin,destination)
:
DF_2
我想要的结果是这样:
CITY <- c('NEW YORK', 'LONDON', 'SAN FRANCISCO', 'MADRID', 'LOS ANGELES', 'LISBON', 'RIO DE JANEIRO', 'MOSCOW', 'SAO PAULO', 'TOKIO')
NEW_YORK <- c(0, 700, 250, 1000, 400, 800, 430, 900, 500, 30)
LONDON <- c(700, 0, 350, 1200, 50, 110, 780, 984, 1150, 5)
SAN_FRANCISCO <- c(250, 350, 0, 200, 15, 260, 305, 412, 29, 102)
MADRID <- c(1000, 1200, 200, 0, 77, 115, 225, 318, 412, 511)
LOS_ANGELES <- c(400, 50, 15, 77, 0, 88, 819, 733, 978, 1001)
LISBON <- c(800, 110, 260, 115, 88, 0, 17, 3000, 1418, 735)
RIO_DE_JANEIRO <- c(430, 780, 305, 225, 819, 17, 0, 513, 701, 56)
MOSCOW <- c(900, 984, 412, 318, 733, 3000, 513, 0, 389, 499)
SAO_PAULO <- c(500, 1150, 29, 412, 978, 1418, 701, 389, 0, 1113)
TOKIO <- c(30, 5, 102, 511, 1001, 735, 56, 499, 1113, 0)
DF_2 <- data.frame(CITY, `NEW YORK` = NEW_YORK, LONDON, `SAN FRANCISCO` = SAN_FRANCISCO, MADRID, `LOS ANGELES` = LOS_ANGELES, LISBON, `RIO DE JANEIRO` = RIO_DE_JANEIRO, MOSCOW, `SAO PAULO` = SAO_PAULO, TOKIO, check.names = FALSE)
答案 0 :(得分:2)
使用基数R:您可以使用:
transform(DF_1,distance = `rownames<-`(DF_2[,-1],DF_2[,1])[as.matrix(DF_1)])
origin destination distance
1 LONDON NEW YORK 700
2 NEW YORK NEW YORK 0
3 TOKIO RIO DE JANEIRO 56
4 LONDON LISBON 110
5 RIO DE JANEIRO MADRID 225
就是这样。用行名作为城市名创建一个新的数据框:
DF_3 <- DF_2[,-1]#Remove the first column
rownames(DF_3) <- DF_2$CITY #change the rownames:
DF_1$DISTANCE <- DF_3[as.matrix(DF_1)]
DF_1
答案 1 :(得分:1)
这是一个row/column
从base R
索引的选项
i1 <- match(DF_1$origin, DF_2$CITY)
j1 <- match(DF_1$destination, names(DF_2)[-1])
DF_1$distance <- DF_2[-1][cbind(i1, j1)]
DF_1
# origin destination distance
#1 LONDON NEW YORK 700
#2 NEW YORK NEW YORK 0
#3 TOKIO RIO DE JANEIRO 56
#4 LONDON LISBON 110
#5 RIO DE JANEIRO MADRID 225
答案 2 :(得分:1)
这应该准确地复制您要查找的内容(使用tidyverse
):
DF_FINAL <- DF_1 %>%
inner_join(DF_2, by = c("origin" = "CITY")) %>%
gather(key = "city", value = "distance", -origin, -destination) %>%
filter(destination == city) %>%
select(-c(city))
DF_FINAL
|origin |destination | distance|
|:--------------|:--------------|--------:|
|LONDON |NEW YORK | 700|
|NEW YORK |NEW YORK | 0|
|RIO DE JANEIRO |MADRID | 225|
|LONDON |LISBON | 110|
|TOKIO |RIO DE JANEIRO | 56|
答案 3 :(得分:1)
我尝试在tidyverse
框架中进行此操作。第一步是将距离矩阵转换为“长”格式。然后,只需将其加入原始data.frame
!
我建议在您的stringsAsFactors = FALSE
定义的末尾添加data.frame()
,以避免出现警告消息。
library(tidyr)
library(dplyr)
pivot_longer(DF_2, -CITY) %>%
rename(origin = CITY, destination = name, distance = value) %>%
right_join(DF_1)
# A tibble: 5 x 3
origin destination distance
<chr> <chr> <dbl>
1 LONDON NEW YORK 700
2 NEW YORK NEW YORK 0
3 TOKIO RIO DE JANEIRO 56
4 LONDON LISBON 110
5 RIO DE JANEIRO MADRID 225