Question

通过以下起点和终点数据，我们如何获得2点之间的路线。

> ddf
  start end
1     a   b
2     a   c
3     b   e
4     b   f
5     b   c
6     a   d
7     e   f
8     f   g

> dput(ddf)
structure(list(start = structure(c(1L, 1L, 2L, 2L, 2L, 1L, 3L, 
4L), .Label = c("a", "b", "e", "f"), class = "factor"), end = structure(c(1L, 
2L, 4L, 5L, 2L, 3L, 5L, 6L), .Label = c("b", "c", "d", "e", "f", 
"g"), class = "factor")), .Names = c("start", "end"), class = "data.frame", row.names = c(NA, 
-8L))
>

此页面（http://www.anselm.edu/homepage/mmalita/culpro/graf1.html）仅显示Prolog中的2行解决方案！以下代码有效，但没有给出正确的输出列表。它可以用mainpath（ddf，＆＃39; a＆＃39; f＆＃39;）开始，找到＆＃39; a＆＃39;之间的路径。并且＆＃39; f＆＃39;。

mainpath = function(ddf, startpt, endpt){
    ddf$start = as.character(ddf$start)
    ddf$end = as.character(ddf$end)
    nodenames = sort(unique(c(ddf$start, ddf$end)))
    routev=c(startpt)
    if(is_arc(ddf, startpt, endpt, routev)) {return("Direct route found."); }
    else{
        nodenames = nodenames[which(nodenames!=startpt)]
        nodenames = nodenames[which(nodenames!=endpt  )]
        if(is_path(ddf, nodenames, startpt, endpt, routev)) 
            return ("Completed successfully")
        else return ("Could not find a path.")
    }
}

is_arc = function(ddf, frompt, topt, routevector){
    len = nrow(ddf)
    for(i in 1:len)     
        if(frompt == ddf$start[i] && topt == ddf$end[i]) {
            routevector = append(routevector, frompt)
            routevector = append(routevector, topt)
            print(routevector)
            return (TRUE); 
        }
    return (FALSE)
}

is_path = function(ddf, othernodes, frompt, topt, routevector){

    if(is_arc(ddf, frompt, topt, routevector)){
        return (TRUE)
    } 
    if(length(othernodes)==0){
        print(routevector)
        return (FALSE)
    }
    for(i in 1:length(othernodes)){
        intermediate = othernodes[i]
        if(is_arc(ddf, frompt, intermediate, routevector) && is_path(ddf, othernodes, intermediate, topt, routevector)){
            return (TRUE)
        }
    }
    print(routevector)
    return (FALSE)
}

我确信它可以得到很大的改进，特别是所有这些for循环等都可以使用apply etc函数来删除。我知道有这些功能的软件包可用，但如何在基础R中完成？您的回答/意见将不胜感激。

Answer 1

虽然我确定使用线性代数有很好的方法可以做到这一点，但这是一个相对直观的方法（在这里使用dplyr，但是你喜欢翻译）：

library(dplyr)

# convert factors to characters, filter down to possible starting points
df %>% mutate_each(funs(as.character)) %>% filter(start == 'a') %>% 
    # join to add possible next steps, indexing endpoints to startpoints
    left_join(df, by = c('end' = 'start')) %>%
    # iterate for successive steps
    left_join(df, by = c('end.y' = 'start')) %>%
    left_join(df, by = c('end.y.y' = 'start')) %>% 
    # chop out rows that didn't end at 'g' (omit if you're curious)
    filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')

#   start end.x end.y end.y.y  end
# 1     a     b     e       f    g
# 2     a     b     f       g <NA>

如果df是因素，您会收到有关强制执行的警告，但它会正常运行（强制启动或添加%>% mutate_each(funs(as.character))到每个df来电，他们会离开）。列名有点难看;如果您愿意，可以使用left_join suffix参数或select或rename设置它们。

显然，连接的迭代会引发一个循环，这可能是这样的：

df2 <- df %>% mutate_each(funs(as.character)) %>% filter(start == 'a')

for(i in 0:2){
  endcol <- paste0('end', paste(rep('.y', i), collapse = ''))
  df2 <- df2 %>% left_join(df, by = setNames('start', endcol))
}

df2 %>% filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')

#   start end.x end.y end.y.y  end
# 1     a     b     e       f    g
# 2     a     b     f       g <NA>

如果你设置的迭代次数太高，它就会输出错误，因为没有要连接的行，但错误实际上非常方便，因为循环已经保存了你想要的df2，所以错误只会阻止额外的工作完成。如果你愿意，可以添加tryCatch，或者转向另一个方向并将其重构为一个看起来很可怕的while循环，它实际上会迭代完美的次数：

df2 <- df %>% mutate_each(funs(as.character)) %>% filter(start == 'a')
endcol <- 'end'    # initialize iterating variable

while(TRUE){
  df2 <- df2 %>% left_join(df, by = setNames('start', endcol))
  endcol <- paste0(endcol, '.y')
}

df2 %>% filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')

#   start end.x end.y end.y.y  end
# 1     a     b     e       f    g
# 2     a     b     f       g <NA>

Answer 2

以下是更短且易于理解的，使用基数R的递归函数。如果发送的data.frame的开始和结束列已经是字符而不是因子，则不需要前2行。

mainpath2 = function(ddf, startpt, endpt, route=c()){
    ddf$start = as.character(ddf$start)
    ddf$end = as.character(ddf$end)
    if(startpt == endpt) return("Error: Same Start and End points.\n")
    for(i in 1:nrow(ddf)){
        if(ddf$start[i] == startpt){
            route = append(route, startpt)
            if(ddf$end[i] == endpt){
                # PATH FOUND: 
                route = append(route, endpt)
                print(route)
            }
            else mainpath2(ddf[-i, ], ddf$end[i], endpt, route)
            route = route[-length(route)]
        }
    }
}

> mainpath2(ddf, 'a', 'g')
[1] "a" "b" "e" "f" "g"
[1] "a" "b" "f" "g"

在R中找到2个点之间的路线

2 个答案: