我有一个节点对data.table
,其中父级在树上比子级高。
我需要从这些规则中提取所有单独的链,例如如果我的格式为parent>child
:(a>b
,b>c
,b>e
,c>d
),则链为(a>b>c>d
,{{1 }}。
我已经用一些虚拟数据作为例子来说明我想做什么。关于如何执行此操作的任何建议都很好?感觉应该很简单,但是我正在努力思考如何开始。谢谢:)
a>b>e
答案 0 :(得分:1)
这是另一种可能的解决方案-虽然也有些混乱
输出
output(input)
# tree_nums elems hierarchy
# 1: 1 a 1
# 2: 1 b 2
# 3: 1 c 3
# 4: 1 d 4
# 5: 2 e 1
# 6: 2 b 2
# 7: 2 c 3
# 8: 2 d 4
# 9: 3 a 1
# 10: 3 b 2
# 11: 3 f 3
# 12: 4 e 1
# 13: 4 b 2
# 14: 4 f 3
#
功能
output <- function (input) {
# init
helper <- do.call(paste0, input)
elements <- unique(unlist(input))
res <- integer(length(elements))
ind <- elements %in% input$Child
# first generation
parents <- elements[!ind]
res[!ind] <- 1L
# later generations
val <- 1L
parents <- parents
trees <- setNames(as.list(seq_along(parents)), parents)
while (any(res == 0L)) {
val <- val + 1L
children <- unique(input$Child[input$Parent %in% parents])
res[elements %in% children] <- val
# create the tree
nextHelper <- expand.grid(parents, children)
nextHelper$conc <- do.call(paste0, nextHelper)
nextHelper <- nextHelper[nextHelper$conc %in% helper,]
df_1 <- do.call(rbind, strsplit(names(trees),''))
df_2 <- base::merge(df_1, nextHelper[,-3L], by.x = ncol(df_1), by.y = 'Var1', all.x = TRUE)
n1 <- ncol(df_2)
if (n1 > 2L) df_2 <- df_2[,c(2:(n1-1),1L,n1)]
df_2$Var2 <- as.character(df_2$Var2)
df_2$Var2[is.na(df_2$Var2)] <- ''
treeNames <- do.call(paste0, df_2)
trees <- setNames(as.list(seq_along(treeNames)), treeNames)
parents <- children
}
elems <- strsplit(names(trees),'')
tree_nums <- rep(as.integer(trees), lengths(elems))
elems <- unlist(elems)
output <- data.table::data.table(tree_nums,elems)
out <- data.table::data.table(elements, res)
output$hierarchy <- out$res[match(output$elems, out$elements)]
output
}
答案 1 :(得分:1)
经过一段口号之后,我有一个解决方案,但如果有的话,我希望有一个更有效的解决方案。
library(stringi)
# convert to string
setkey(input, Parent)
sep <- ">>"
split_regex <- "(?<=%1$s)[^(%1$s)]*$"
trees <- sprintf("%s%s%s", input$Parent, sep, input$Child)
# get the base nodes, the children
children <- stri_extract_first_regex(trees, sprintf(split_regex, sep),
simplify = TRUE)
# find that which are parents
grid <- input[J(unique(children)), ][!is.na(Child), ]
update <- unique(grid$Parent)
N <- nrow(grid)
while(N > 0){
# add the children on for the ones at the base of the chains, might mean
# making more tree splits
all_trees <- unique(unlist(lapply(update, function(x){
pos <- children == x
y <- grid[Parent %in% x, Child]
trees <- c(trees[!pos], CJ(trees[pos], y)[, sprintf("%s%s%s", V1, sep, V2)])
trees
})))
# I have some trees embedded now, so remove these ones
trim <- sapply(seq_along(all_trees), function(i){
any(stri_detect_fixed(all_trees[-i], all_trees[i]))
})
trees <- all_trees[!trim]
# update operations on expanded trees until no children remain with a dependency
children <- stri_extract_first_regex(trees, sprintf(split_regex, sep, sep),
simplify = TRUE)
grid <- input[J(unique(children)), ][!is.na(Child), ]
update <- unique(grid$Parent)
N <- nrow(grid)
}
# re-structure to appropriate format
output <- data.table(pattern = trees)
output[, Tree := 1:.N]
output[, split := stri_split_regex(pattern, sep)]
output <- output[, .(List = split[[1]],
Hierarchy = 1:length(split[[1]])), by=Tree]
output[]