通过以下起点和终点数据,我们如何获得2点之间的路线。
> ddf
start end
1 a b
2 a c
3 b e
4 b f
5 b c
6 a d
7 e f
8 f g
> dput(ddf)
structure(list(start = structure(c(1L, 1L, 2L, 2L, 2L, 1L, 3L,
4L), .Label = c("a", "b", "e", "f"), class = "factor"), end = structure(c(1L,
2L, 4L, 5L, 2L, 3L, 5L, 6L), .Label = c("b", "c", "d", "e", "f",
"g"), class = "factor")), .Names = c("start", "end"), class = "data.frame", row.names = c(NA,
-8L))
>
此页面(http://www.anselm.edu/homepage/mmalita/culpro/graf1.html)仅显示Prolog中的2行解决方案!以下代码有效,但没有给出正确的输出列表。它可以用mainpath(ddf,' a' f')开始,找到' a'之间的路径。并且' f'。
mainpath = function(ddf, startpt, endpt){
ddf$start = as.character(ddf$start)
ddf$end = as.character(ddf$end)
nodenames = sort(unique(c(ddf$start, ddf$end)))
routev=c(startpt)
if(is_arc(ddf, startpt, endpt, routev)) {return("Direct route found."); }
else{
nodenames = nodenames[which(nodenames!=startpt)]
nodenames = nodenames[which(nodenames!=endpt )]
if(is_path(ddf, nodenames, startpt, endpt, routev))
return ("Completed successfully")
else return ("Could not find a path.")
}
}
is_arc = function(ddf, frompt, topt, routevector){
len = nrow(ddf)
for(i in 1:len)
if(frompt == ddf$start[i] && topt == ddf$end[i]) {
routevector = append(routevector, frompt)
routevector = append(routevector, topt)
print(routevector)
return (TRUE);
}
return (FALSE)
}
is_path = function(ddf, othernodes, frompt, topt, routevector){
if(is_arc(ddf, frompt, topt, routevector)){
return (TRUE)
}
if(length(othernodes)==0){
print(routevector)
return (FALSE)
}
for(i in 1:length(othernodes)){
intermediate = othernodes[i]
if(is_arc(ddf, frompt, intermediate, routevector) && is_path(ddf, othernodes, intermediate, topt, routevector)){
return (TRUE)
}
}
print(routevector)
return (FALSE)
}
我确信它可以得到很大的改进,特别是所有这些for循环等都可以使用apply etc函数来删除。我知道有这些功能的软件包可用,但如何在基础R中完成? 您的回答/意见将不胜感激。
答案 0 :(得分:1)
虽然我确定使用线性代数有很好的方法可以做到这一点,但这是一个相对直观的方法(在这里使用dplyr
,但是你喜欢翻译):
library(dplyr)
# convert factors to characters, filter down to possible starting points
df %>% mutate_each(funs(as.character)) %>% filter(start == 'a') %>%
# join to add possible next steps, indexing endpoints to startpoints
left_join(df, by = c('end' = 'start')) %>%
# iterate for successive steps
left_join(df, by = c('end.y' = 'start')) %>%
left_join(df, by = c('end.y.y' = 'start')) %>%
# chop out rows that didn't end at 'g' (omit if you're curious)
filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')
# start end.x end.y end.y.y end
# 1 a b e f g
# 2 a b f g <NA>
如果df
是因素,您会收到有关强制执行的警告,但它会正常运行(强制启动或添加%>% mutate_each(funs(as.character))
到每个df
来电,他们会离开)。列名有点难看;如果您愿意,可以使用left_join
suffix
参数或select
或rename
设置它们。
显然,连接的迭代会引发一个循环,这可能是这样的:
df2 <- df %>% mutate_each(funs(as.character)) %>% filter(start == 'a')
for(i in 0:2){
endcol <- paste0('end', paste(rep('.y', i), collapse = ''))
df2 <- df2 %>% left_join(df, by = setNames('start', endcol))
}
df2 %>% filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')
# start end.x end.y end.y.y end
# 1 a b e f g
# 2 a b f g <NA>
如果你设置的迭代次数太高,它就会输出错误,因为没有要连接的行,但错误实际上非常方便,因为循环已经保存了你想要的df2
,所以错误只会阻止额外的工作完成。如果你愿意,可以添加tryCatch
,或者转向另一个方向并将其重构为一个看起来很可怕的while
循环,它实际上会迭代完美的次数:
df2 <- df %>% mutate_each(funs(as.character)) %>% filter(start == 'a')
endcol <- 'end' # initialize iterating variable
while(TRUE){
df2 <- df2 %>% left_join(df, by = setNames('start', endcol))
endcol <- paste0(endcol, '.y')
}
df2 %>% filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')
# start end.x end.y end.y.y end
# 1 a b e f g
# 2 a b f g <NA>
答案 1 :(得分:0)
以下是更短且易于理解的,使用基数R的递归函数。如果发送的data.frame的开始和结束列已经是字符而不是因子,则不需要前2行。
mainpath2 = function(ddf, startpt, endpt, route=c()){
ddf$start = as.character(ddf$start)
ddf$end = as.character(ddf$end)
if(startpt == endpt) return("Error: Same Start and End points.\n")
for(i in 1:nrow(ddf)){
if(ddf$start[i] == startpt){
route = append(route, startpt)
if(ddf$end[i] == endpt){
# PATH FOUND:
route = append(route, endpt)
print(route)
}
else mainpath2(ddf[-i, ], ddf$end[i], endpt, route)
route = route[-length(route)]
}
}
}
> mainpath2(ddf, 'a', 'g')
[1] "a" "b" "e" "f" "g"
[1] "a" "b" "f" "g"