我的问题包括分割路径,获取所有子路径直到下一个" $" ocurrence(一种累积子路径)并为每个子路径生成一个新变量。
逐步完成我得到所需的输出:
data<-data.frame(path=c("A/A/$/B/$/A/$","B/C/$","B/C/$/C/$/A/B/$"),stringsAsFactors=FALSE)
library(stringr)
data$tr<-str_count(data$path,"\\$")
data$tr_1<-substr(sapply(strsplit(data$path, "\\$"), `[[`, 1),1,nchar(sapply(strsplit(data$path, "\\$"), `[[`, 1))-1)
data$tr_2<-ifelse(is.na(sapply(strsplit(data$path, "\\$"), `[`, 2))==TRUE,
"",
paste0(data$tr_1,substr(sapply(strsplit(data$path, "\\$"), `[`, 2),1,nchar(sapply(strsplit(data$path, "\\$"), `[`, 2))-1)))
data$tr_3<-ifelse(is.na(sapply(strsplit(data$path, "\\$"), `[`, 3))==TRUE,
"",
paste0(data$tr_2,substr(sapply(strsplit(data$path, "\\$"), `[`, 3),1,nchar(sapply(strsplit(data$path, "\\$"), `[`, 3))-1)))
尝试根据Creating new named variable in dataframe using loop and naming convention在循环中执行相同操作,输出失败。
data<-data[,-c(4,5)]
for (i in 2:max(data$tr)) {
data[[paste0("tr_",i)]]<-ifelse(is.na(sapply(strsplit(data$path, "\\$"), `[`, i))==TRUE,
"",
paste0(data$tr_i-1,substr(sapply(strsplit(data$path, "\\$"), `[`, i),1,nchar(sapply(strsplit(data$path, "\\$"), `[`, i))-1)))
}
还有另一种递归方式吗? (每个新变量使用前一个变量) 提前谢谢!
答案 0 :(得分:1)
我会这样做:
data<-data.frame(path=c("A/A/$/B/$/A/$","B/C/$","B/C/$/C/$/A/B/$"),stringsAsFactors=FALSE)
#split strings
tmp <- strsplit(data$path, "/$", fixed = TRUE) #thanks to David
data$tr <- lengths(tmp)
#paste them together cumulatively
tmp <- lapply(tmp, Reduce, f = paste0, accumulate = TRUE)
#create data.frame
tmp <- lapply(tmp, `length<-`, max(lengths(tmp)))
tmp <- setNames(as.data.frame(do.call(rbind, tmp), stringsAsFactors = FALSE),
paste0("tr_", seq_len(max(data$tr))))
data <- cbind(data, tmp)
# path tr tr_1 tr_2 tr_3
#1 A/A/$/B/$/A/$ 3 A/A A/A/B A/A/B/A
#2 B/C/$ 1 B/C <NA> <NA>
#3 B/C/$/C/$/A/B/$ 3 B/C B/C/C B/C/C/A/B
如果必须,可以在另一个NA
循环中用空字符串替换lapply
值。