我有一个这样的数据框:
df <- data.frame(x=c(7,5,4),y=c(100,100,100),w=c(170,170,170),z=c(132,720,1256))
我使用mapply创建一个新列:
set.seed(123)
library(truncnorm)
df$res <- mapply(rtruncnorm,df$x,df$y,df$w,df$z,25)
所以,我得到了:
> df
#x y w z res
#1 7 100 170 132 117.9881, 126.2456, 133.7627, 135.2322, 143.5229, 100.3735, 114.8287
#2 5 100 170 720 168.8581, 169.4955, 169.6461, 169.8998, 169.0343
#3 4 100 170 1256 169.7245, 167.6744, 169.7025, 169.4441
#dput(df)
df <- structure(list(x = c(7, 5, 4), y = c(100, 100, 100), w = c(170,
170, 170), z = c(132, 720, 1256), res = list(c(117.988108836195,
126.245562762918, 133.762709785614, 135.232193379024, 143.52290514973,
100.373469134837, 114.828678702662), c(168.858147661715, 169.495493758985,
169.646123183828, 169.899849943838, 169.034333943479), c(169.724470294466,
167.674371713068, 169.70250974042, 169.444134892323))), .Names = c("x",
"y", "w", "z", "res"), row.names = c(NA, -3L), class = "data.frame")
但我真正需要的是根据df$res
结果重复每行df数据帧,如下所示:
> df2
# x y w z res
#1 7 100 170 132 117.9881
#2 7 100 170 132 126.2456
#3 7 100 170 132 133.7627
#4 7 100 170 132 135.2322
#5 7 100 170 132 143.5229
#6 7 100 170 132 100.3735
#7 7 100 170 132 114.8287
#8 5 100 170 720 168.8581
#9 5 100 170 720 169.4955
#10 5 100 170 720 169.6461
#11 5 100 170 720 169.8998
#12 5 100 170 720 169.0343
#13 4 100 170 1256 169.7245
#14 4 100 170 1256 167.6744
#15 4 100 170 1256 169.7025
#16 4 100 170 1256 169.4441
如何有效地实现这一目标?我需要将其应用于大型数据框
答案 0 :(得分:2)
df <- data.frame(x=c(7,5,4),y=c(100,100,100),w=c(170,170,170),z=c(132,720,1256))
set.seed(123)
l <- mapply(rtruncnorm,df$x,df$y,df$w,df$z,25)
cbind.data.frame(df[rep(seq_along(l), lengths(l)),],
res = unlist(l))
# x y w z res
# 1 7 100 170 132 117.9881
# 1.1 7 100 170 132 126.2456
# 1.2 7 100 170 132 133.7627
# 1.3 7 100 170 132 135.2322
# 1.4 7 100 170 132 143.5229
# 1.5 7 100 170 132 100.3735
# 1.6 7 100 170 132 114.8287
# 2 5 100 170 720 168.8581
# 2.1 5 100 170 720 169.4955
# 2.2 5 100 170 720 169.6461
# 2.3 5 100 170 720 169.8998
# 2.4 5 100 170 720 169.0343
# 3 4 100 170 1256 169.7245
# 3.1 4 100 170 1256 167.6744
# 3.2 4 100 170 1256 169.7025
# 3.3 4 100 170 1256 169.4441
答案 1 :(得分:0)
根据您给定的df
df$res <- sapply(df$res, paste0, collapse=",")
do.call(rbind, apply(df, 1, function(x) do.call(expand.grid, strsplit(x, ","))))
# x y w z res
# 1 7 100 170 132 117.988108836195
# 2 7 100 170 132 126.245562762918
# 3 7 100 170 132 133.762709785614
# 4 7 100 170 132 135.232193379024
# 5 7 100 170 132 143.52290514973
# 6 7 100 170 132 100.373469134837
# 7 7 100 170 132 114.828678702662
# 8 5 100 170 720 168.858147661715
# 9 5 100 170 720 169.495493758985
# 10 5 100 170 720 169.646123183828
# 11 5 100 170 720 169.899849943838
# 12 5 100 170 720 169.034333943479
# 13 4 100 170 1256 169.724470294466
# 14 4 100 170 1256 167.674371713068
# 15 4 100 170 1256 169.70250974042
# 16 4 100 170 1256 169.444134892323