我有以下数据:
>Data
# X Y
# 1 123 234 345 456
# 2 222 333 444 555 666
我希望得到以下结果:
>Data
# X Y
# 1 123
# 1 234
# 1 345
# 1 456
# 2 222
# 2 333
# 2 444
# 2 555
# 2 666
请帮忙!
答案 0 :(得分:2)
这个真的在your earlier question上的构建不是很多。请尝试自己做一些功课,或者至少展示你到目前为止尝试过的东西!
Data1 <- cbind(X = Data$X,
read.table(text = as.character(Data$Y),
fill = TRUE, header = FALSE))
data.frame(X = Data1[, 1], stack(Data1[-1]))
# X values ind
# 1 1 123 V1
# 2 2 222 V1
# 3 1 234 V2
# 4 2 333 V2
# 5 1 345 V3
# 6 2 444 V3
# 7 1 456 V4
# 8 2 555 V4
# 9 1 NA V5
# 10 2 666 V5
答案 1 :(得分:2)
如果你的列Y
是一个字符向量,那么这应该是:
out <- stack(setNames(strsplit(df$Y, " "), df$X))
out$values <- as.numeric(out$values)
对20e4行data.frame上的所有答案进行基准测试:
df <- read.table(header=TRUE, text="X Y
1 '123 234 345 456'
2 '222 333 444 555 666'")
# thanks to @MatthewPlourde for the suggestion to use replicate
df <- do.call(rbind, replicate(10000, df, simplify = FALSE))
dim(df)
# [1] 20000 2
sapply(df, class)
X Y
"integer" "factor"
# Arun's function
Arun <- function(df) {
out <- stack(setNames(strsplit(as.character(df$Y), " "), df$X))
out$values <- as.numeric(out$values)
out
}
# Ananda's function
Ananda <- function(Data) {
Data1 <- cbind(X = Data$X,
read.table(text = as.character(Data$Y),
fill = TRUE, header = FALSE))
data.frame(X = Data1[, 1], stack(Data1[-1]))
}
# Matthew's solution
Matthew <- function(d) {
stack(by(d$Y, d$X, function(x)
as.numeric(scan(text=as.character(x),
what='', quiet=TRUE))))
}
require(microbenchmark)
microbenchmark(a1 <- Arun(df), a2 <- Ananda(df), a3 <- Matthew(df), times = 5)
Unit: milliseconds
expr min lq median uq max neval
a1 <- Arun(df) 235.6945 258.8485 264.4166 329.2974 392.9559 5
a2 <- Ananda(df) 6661.8461 6972.2823 7825.3701 8210.9970 9454.5762 5
a3 <- Matthew(df) 3589.1784 3691.3826 3787.4163 4020.4895 5034.6580 5
答案 2 :(得分:2)
此处已有很好的答案,但这是by
和scan
的另一种方法。
stack(by(d$Y, d$X,
function(x) as.numeric(scan(text=as.character(x), what='', quiet=TRUE))))