Here's a simple taxonomy (labels and IDs):
test_data <- data.frame(
cat_id = c(661, 197, 228, 650, 126, 912, 949, 428),
cat_h1 = c(rep("Animals", 5), rep("Plants", 3)),
cat_h2 = c(rep("Mammals", 3), rep("Birds", 2), c("Wheat", "Grass", "Other")),
cat_h3 = c("Dogs", "Dogs", "Other", "Hawks", "Other", rep(NA, 3)),
cat_h4 = c("Big", "Little", rep(NA, 6)))
The parsed structure should match the following:
list(
Animals = list(Mammals = list(Dogs = list(Big = 661, Little = 197), Other = 228),
Birds = list(Hawks = 650, Other = 126)),
Plants = list(Wheat = 912, Grass = 949, Other = 428))
答案 0 :(得分:6)
如果订单略有变化,那么这是一个按列处理的递归解决方案:
f <- function(x, d=cbind(x,NA)) {
c(
# call f by branch
if(ncol(d) > 3) local({
x <- d[!is.na(d[[3]]),]
by( x[-2], droplevels(x[2]), f, x=NA, simplify=FALSE)
}),
# leaf nodes
setNames(as.list(d[[1]]), d[[2]])[is.na(d[[3]])]
)
}
会给出这个:
> str(f(test_data))
List of 2
$ Animals:List of 2
..$ Birds :List of 2
.. ..$ Hawks: num 650
.. ..$ Other: num 126
..$ Mammals:List of 2
.. ..$ Dogs :List of 2
.. .. ..$ Big : num 661
.. .. ..$ Little: num 197
.. ..$ Other: num 228
$ Plants :List of 3
..$ Wheat: num 912
..$ Grass: num 949
..$ Other: num 428
答案 1 :(得分:3)
也许不是最有效的,但也不是太难:
创建数据:
test_data <- data.frame(
cat_id = c(661, 197, 228, 650, 126, 912, 949, 428),
cat_h1 = c(rep("Animals", 5), rep("Plants", 3)),
cat_h2 = c(rep("Mammals", 3), rep("Birds", 2), c("Wheat", "Grass", "Other")),
cat_h3 = c("Dogs", "Dogs", "Other", "Hawks", "Other", rep(NA, 3)),
cat_h4 = c("Big", "Little", rep(NA, 6)))
循环遍历数据框并构建列表/树:
tax <- list() ## initialize
for (i in 1:nrow(test_data)) {
## convert data.frame row to character vector
taxdat <- sapply(test_data[i,-1],as.character)
taxstr <- character(0) ## initialize taxon string
ntax <- length(na.omit(taxdat))
for (j in 1:ntax) {
taxstr <- c(taxstr,taxdat[j]) ## build string
if (is.null(tax[[taxstr]])) {
tax[[taxstr]] <- list() ## initialize if necessary
}
}
tax[[taxstr]] <- test_data$cat_id[i] ## assign value to tip
}
将结果与期望值进行比较:
res <- list(
Animals = list(Mammals = list(Dogs = list(Big = 661, Little = 197),
Other = 228),
Birds = list(Hawks = 650, Other = 126)),
Plants = list(Wheat = 912, Grass = 949, Other = 428))
all.equal(res,tax) ## TRUE
答案 2 :(得分:1)
我会避免使用列表结构,而不是整理数据。这是一种减少数据冗余的方法。
.text
# First Input - Saved to $t1
la $a0, input
li $v0, 4
syscall
li $v0, 5
syscall
move $s0, $v0 # save to $s0
# Second Input - Saved to $t2
la $a0, input2
li $v0, 4
syscall
li $v0, 5
syscall
move $t1, $s0 # restore 1st number
move $t2, $v0
# Compare the two Inputs
bgt $t1, $t2, Bigger
blt $t1, $t2, Smaller
# If the 1st is greater
# ($t1 + 5) - ($t2 * 2) = result
Bigger:
add $t4, $t1, 5 # $t4 = $t1 + 5
mul $t5, $t2, 2 # $t5 = $t2 * 2
sub $t7, $t4, $t5 # $t7 = $t4 - $t5
li $v0, 1
move $a0, $t7
syscall
li $v0, 10
syscall
# If the 1st is smaller
Smaller:
add $t4, $t2, 5 # $t4 = $t2 + 5
mul $t5, $t1, 2 # $t5 = $t1 * 2
sub $t7, $t4, $t5 # $t7 = $t4 - $t5
li $v0, 1
move $a0, $t7
syscall
li $v0, 10
syscall
.data
input: .asciiz "Enter the First Integer: "
input2: .asciiz "Enter the Second Integer: "
Halt: li $v0, 10
syscall
原件很容易重组:
library(dplyr)
h1_h2 =
test_data %>%
select(cat_h1, cat_h2) %>%
distinct %>%
filter(cat_h2 %>% is.na %>% `!`)
h2_h3 =
test_data %>%
select(cat_h2, cat_h3) %>%
distinct %>%
filter(cat_h3 %>% is.na %>% `!`)
h3_h4 =
test_data %>%
select(cat_h3, cat_h4) %>%
distinct %>%
filter(cat_h4 %>% is.na %>% `!`)
编辑:这是一种自动化整个过程的方法。
h1_h2 %>%
left_join(h2_h3) %>%
left_join(h3_h4)