我想使用存储在数据框中的值生成合成数据集。在新的数据框架中,我需要具有指定均值的对数正态随机分布中的n行,所以我尝试了这个:
sp = '
species CE_mean Ph_mean n
Apocal 0.6398000 6.233600 200
Aporos 0.6334615 6.518269 156
Apotra 0.8448980 6.561224 49
'
msp <- read.table(text=sp,header = TRUE)
spdf <- data.frame()
for( i in 1:nrow(msp))
{
spm1 <- data.frame()
spm1$CE <-rlnorm(n=msp$n[i],meanlog=msp$CE_mean[i],sdlog=0.1)
spm1$Ph <-rlnorm(n=msp$n[i],meanlog=msp$Ph_mean[i],sdlog=0.1)
spm1$species <- msp$species[i]
spdf<-rbind(spdf,spm1)
}
但它不起作用,我想知道如何使用dplyr进行此操作。
答案 0 :(得分:2)
我不确定dplyr
是最好的方法。您可以通过以下方式修复代码:
spdf <- data.frame()
for( i in 1:nrow(msp)) {
CE <-rlnorm(n=msp$n[i],meanlog=msp$CE_mean[i],sdlog=0.1)
Ph <-rlnorm(n=msp$n[i],meanlog=msp$Ph_mean[i],sdlog=0.1)
species <- msp$species[i]
spdf<-rbind(spdf,data.frame(CE=CE,Ph=Ph,species=species))
}
或:
spdf <- do.call(rbind,lapply(1:nrow(msp),function(i) data.frame(CE=rlnorm(n=msp$n[i],meanlog=msp$CE_mean[i],sdlog=0.1),
Ph=rlnorm(n=msp$n[i],meanlog=msp$Ph_mean[i],sdlog=0.1),
species=msp$species[i])))
使用set.seed(123)
,我得到:
set.seed(123)
spdf
## CE Ph species
##1 1.792753 634.9086 Apocal
##2 1.852956 581.0526 Apocal
##3 2.215927 496.2528 Apocal
##4 1.909518 538.0327 Apocal
##5 1.920775 488.9039 Apocal
## ...
##195 1.663161 481.1812 Apocal
##196 2.315258 592.2863 Apocal
##197 2.013493 471.6256 Apocal
##198 1.673091 554.5590 Apocal
##199 1.783688 449.2285 Apocal
##200 1.684135 491.8362 Apocal
##201 1.870313 673.9387 Aporos
##202 1.676312 642.6347 Aporos
##203 1.768243 664.1729 Aporos
##204 1.878695 636.0716 Aporos
##205 2.014822 623.2107 Aporos
## ...
##352 1.742361 618.8405 Aporos
##353 2.105457 692.9110 Aporos
##354 1.931784 730.0238 Aporos
##355 2.222545 753.2359 Aporos
##356 1.628345 663.1387 Aporos
##357 2.306046 752.1002 Apotra
##358 2.307643 752.1086 Apotra
##359 2.688663 597.0578 Apotra
##360 2.604928 733.6985 Apotra
##361 2.530301 778.9991 Apotra
## ...
##401 2.575855 717.4006 Apotra
##402 2.281315 701.8091 Apotra
##403 1.898625 877.7533 Apotra
##404 2.282586 726.9484 Apotra
##405 2.456843 696.0313 Apotra
答案 1 :(得分:1)
这是一个dplyr
解决方案:
spdf <- msp %>% rowwise() %>%
do(data.frame(species = .$species,
CE=rlnorm(n=.$n,meanlog=.$CE_mean,sdlog=0.1),
Ph=rlnorm(n=.$n,meanlog=.$Ph_mean,sdlog=0.1),
stringsAsFactors=FALSE)) %>%
ungroup()
应该屈服:
species CE Ph
* <fctr> <dbl> <dbl>
1 Apocal 2.168593 538.4061
2 Apocal 1.868780 535.1687
3 Apocal 1.993015 503.7631
4 Apocal 1.764942 495.0502
5 Apocal 1.671921 503.3961
6 Apocal 2.013073 464.7946
7 Apocal 2.190407 538.6861
8 Apocal 1.668348 479.1846
9 Apocal 2.018912 443.7977
10 Apocal 1.802224 635.2461
# ... with 395 more rows
我希望这会有所帮助。