在R中找到2个点之间的路线

时间:2016-07-02 16:19:40

标签: r

通过以下起点和终点数据,我们如何获得2点之间的路线。

> ddf
  start end
1     a   b
2     a   c
3     b   e
4     b   f
5     b   c
6     a   d
7     e   f
8     f   g

> dput(ddf)
structure(list(start = structure(c(1L, 1L, 2L, 2L, 2L, 1L, 3L, 
4L), .Label = c("a", "b", "e", "f"), class = "factor"), end = structure(c(1L, 
2L, 4L, 5L, 2L, 3L, 5L, 6L), .Label = c("b", "c", "d", "e", "f", 
"g"), class = "factor")), .Names = c("start", "end"), class = "data.frame", row.names = c(NA, 
-8L))
> 

此页面(http://www.anselm.edu/homepage/mmalita/culpro/graf1.html)仅显示Prolog中的2行解决方案!以下代码有效,但没有给出正确的输出列表。它可以用mainpath(ddf,' a' f')开始,找到' a'之间的路径。并且' f'。

mainpath = function(ddf, startpt, endpt){
    ddf$start = as.character(ddf$start)
    ddf$end = as.character(ddf$end)
    nodenames = sort(unique(c(ddf$start, ddf$end)))
    routev=c(startpt)
    if(is_arc(ddf, startpt, endpt, routev)) {return("Direct route found."); }
    else{
        nodenames = nodenames[which(nodenames!=startpt)]
        nodenames = nodenames[which(nodenames!=endpt  )]
        if(is_path(ddf, nodenames, startpt, endpt, routev)) 
            return ("Completed successfully")
        else return ("Could not find a path.")
    }
}

is_arc = function(ddf, frompt, topt, routevector){
    len = nrow(ddf)
    for(i in 1:len)     
        if(frompt == ddf$start[i] && topt == ddf$end[i]) {
            routevector = append(routevector, frompt)
            routevector = append(routevector, topt)
            print(routevector)
            return (TRUE); 
        }
    return (FALSE)
}

is_path = function(ddf, othernodes, frompt, topt, routevector){

    if(is_arc(ddf, frompt, topt, routevector)){
        return (TRUE)
    } 
    if(length(othernodes)==0){
        print(routevector)
        return (FALSE)
    }
    for(i in 1:length(othernodes)){
        intermediate = othernodes[i]
        if(is_arc(ddf, frompt, intermediate, routevector) && is_path(ddf, othernodes, intermediate, topt, routevector)){
            return (TRUE)
        }
    }
    print(routevector)
    return (FALSE)
}

我确信它可以得到很大的改进,特别是所有这些for循环等都可以使用apply etc函数来删除。我知道有这些功能的软件包可用,但如何在基础R中完成? 您的回答/意见将不胜感激。

2 个答案:

答案 0 :(得分:1)

虽然我确定使用线性代数有很好的方法可以做到这一点,但这是一个相对直观的方法(在这里使用dplyr,但是你喜欢翻译):

library(dplyr)

# convert factors to characters, filter down to possible starting points
df %>% mutate_each(funs(as.character)) %>% filter(start == 'a') %>% 
    # join to add possible next steps, indexing endpoints to startpoints
    left_join(df, by = c('end' = 'start')) %>%
    # iterate for successive steps
    left_join(df, by = c('end.y' = 'start')) %>%
    left_join(df, by = c('end.y.y' = 'start')) %>% 
    # chop out rows that didn't end at 'g' (omit if you're curious)
    filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')

#   start end.x end.y end.y.y  end
# 1     a     b     e       f    g
# 2     a     b     f       g <NA>

如果df是因素,您会收到有关强制执行的警告,但它会正常运行(强制启动或添加%>% mutate_each(funs(as.character))到每个df来电,他们会离开)。列名有点难看;如果您愿意,可以使用left_join suffix参数或selectrename设置它们。

显然,连接的迭代会引发一个循环,这可能是这样的:

df2 <- df %>% mutate_each(funs(as.character)) %>% filter(start == 'a')

for(i in 0:2){
  endcol <- paste0('end', paste(rep('.y', i), collapse = ''))
  df2 <- df2 %>% left_join(df, by = setNames('start', endcol))
}

df2 %>% filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')

#   start end.x end.y end.y.y  end
# 1     a     b     e       f    g
# 2     a     b     f       g <NA>

如果你设置的迭代次数太高,它就会输出错误,因为没有要连接的行,但错误实际上非常方便,因为循环已经保存了你想要的df2,所以错误只会阻止额外的工作完成。如果你愿意,可以添加tryCatch,或者转向另一个方向并将其重构为一个看起来很可怕的while循环,它实际上会迭代完美的次数:

df2 <- df %>% mutate_each(funs(as.character)) %>% filter(start == 'a')
endcol <- 'end'    # initialize iterating variable

while(TRUE){
  df2 <- df2 %>% left_join(df, by = setNames('start', endcol))
  endcol <- paste0(endcol, '.y')
}

df2 %>% filter(apply(., 1, function(x){x[length(na.omit(x))]}) == 'g')

#   start end.x end.y end.y.y  end
# 1     a     b     e       f    g
# 2     a     b     f       g <NA>

答案 1 :(得分:0)

以下是更短且易于理解的,使用基数R的递归函数。如果发送的data.frame的开始和结束列已经是字符而不是因子,则不需要前2行。

mainpath2 = function(ddf, startpt, endpt, route=c()){
    ddf$start = as.character(ddf$start)
    ddf$end = as.character(ddf$end)
    if(startpt == endpt) return("Error: Same Start and End points.\n")
    for(i in 1:nrow(ddf)){
        if(ddf$start[i] == startpt){
            route = append(route, startpt)
            if(ddf$end[i] == endpt){
                # PATH FOUND: 
                route = append(route, endpt)
                print(route)
            }
            else mainpath2(ddf[-i, ], ddf$end[i], endpt, route)
            route = route[-length(route)]
        }
    }
}

> mainpath2(ddf, 'a', 'g')
[1] "a" "b" "e" "f" "g"
[1] "a" "b" "f" "g"