将函数应用于每行的元素,然后进行汇总

时间:2016-08-23 22:50:39

标签: r

我有一个关于在data.frame中操作数据的问题。

基本上我有一个大型数据集 - 下面的缩写版本:

structure(list(nm_mean = c(194213914.326, 194213914.326, 194213914.326, 
194213914.326, 194213914.326, 217947112.739), nm_se = c(9984735.05918367, 
9984735.05918367, 9984735.05918367, 9984735.05918367, 9984735.05918367, 
11010386.0760204), alpha = c(193.197697846336, 214.592588477741, 
240.246557258741, 258.116959355425, 282.560024775668, 306.610038660465
), beta = c(61526.2664158025, 57950.9563448233, 56085.1512614369, 
52919.4794239927, 51483.4591654126, 50405.8186695088)), .Names = c("nm_mean", 
"nm_se", "alpha", "beta"), row.names = c(NA, 6L), class = "data.frame")

我想使用rbeta来生成使用beta分布和alpha和beta作为参数的概率

类似地,我想使用rnorm使用nm_mean和nm_se作为均值和sd的正态分布生成随机数。

然后我想将rnorm值生成的rbeta值相乘,并将第50,25和75th分位数提取回数据帧

所以作为第1行的例子

x <- rbeta(1000,193.1977,61526.27)
y <- rnorm(1000,194213914,9984735)
z <- x*y

dat$ce <- quantile(z,0.5)
dat$ll <- quantile(z,0.25)
dat$ul <- quantile(z,0.975)

本质上我得到了一个ce,ll和ul作为rbeta和rnorm的产品附加回数据库。

2 个答案:

答案 0 :(得分:6)

受@ HackR代码的启发,我认为它是一个功能矢量化版本:

set.seed(42)
n <- 1000
nrows <- nrow(dat)
rn <- matrix(rnorm(nrows * n, dat$nm_mean, dat$nm_se), ncol = nrows, byrow = TRUE)
rb <- matrix(rbeta(nrows * n, shape1 = dat$alpha, shape2 = dat$beta),
             ncol = nrows, byrow = TRUE)
cbind(dat,
      structure(t(apply(rn * rb, 2, function(z) quantile(z, c(0.5, 0.25, 0.975)))),
                .Dimnames = list(NULL, c("ce", "ll", "ul"))))
#     nm_mean    nm_se    alpha     beta        ce        ll        ul
# 1 194213914  9984735 193.1977 61526.27  608455.3  570100.5  710373.6
# 2 194213914  9984735 214.5926 57950.96  715305.0  677754.3  856570.7
# 3 194213914  9984735 240.2466 56085.15  825143.7  778351.2  979361.1
# 4 194213914  9984735 258.1170 52919.48  943261.4  895832.6 1091899.3
# 5 194213914  9984735 282.5600 51483.46 1054514.3  995640.8 1226176.4
# 6 217947113 11010386 306.6100 50405.82 1312325.0 1247030.8 1515630.5

答案 1 :(得分:1)

这是基于我与@thelatemail对话的矢量化解决方案:

n   <- 1000
grp <- nrow(dat)
z   <- with(dat, rnorm(grp*n, nm_mean, nm_se) * rbeta(grp*n, alpha, beta) )
m   <- 1

for(i in 1:nrow(dat)){
  dat$ce[i] <- quantile(z[m:(i*1000)],0.5)
  dat$ll[i] <- quantile(z[m:(i*1000)],0.25)
  dat$ul[i] <- quantile(z[m:(i*1000)],0.975)
  m <- m + 1000
}

较少的矢量化解决方案是:

for(i in 1:nrow(dat)){
  x <- rbeta(1000, shape1 = dat$alpha[i], shape2 = dat$beta[i])
  y <- rnorm(n=1000,dat$nm_mean[i],dat$nm_se[i])
  z <- x*y

  dat$ce[i] <- quantile(z,0.5)
  dat$ll[i] <- quantile(z,0.25)
  dat$ul[i] <- quantile(z,0.975)
}

dat
    nm_mean    nm_se    alpha     beta        ce        ll        ul
1 194213914  9984735 193.1977 61526.27  607563.9  573229.9  713057.2
2 194213914  9984735 214.5926 57950.96  712268.5  674826.3  836950.8
3 194213914  9984735 240.2466 56085.15  823322.9  777482.8  981156.7
4 194213914  9984735 258.1170 52919.48  937331.2  884945.0 1095876.3
5 194213914  9984735 282.5600 51483.46 1059980.4 1003596.4 1225615.6
6 217947113 11010386 306.6100 50405.82 1316733.1 1250190.1 1515185.0