如何修改代码以包含循环?

时间:2019-08-18 15:36:00

标签: r loops

我正在尝试创建一个函数来检查具有不同分布的变量如何影响OLS结果。我创建了两个DV(y1和y2),但想将其扩展到包括五个左右。我正在尝试将代码更改为包含循环,因此无需多次复制和粘贴此代码,但是我运气不高。任何建议将不胜感激。

library(psych)
library(arm)
library(plyr)
library(fBasics)

regsim <- function(iter, n) {
ek1 <- rnorm(n, 0, 1) 
ek2 <- rnorm(n, 0, 5) 
x <- rnorm(n, 0, .5)

y1 <- .3*x + ek1
y2 <- .3*x + ek2

#y1
lm1 <- lm(y1 ~ x)
bhat1 <- coef (lm1)[2]
sehat1 <- se.coef (lm1) [2] 
skewy1 <- skew(y1)
stdevy1 <- stdev(y1)

#y2
lm2 <- lm(y2 ~ x)
bhat2 <- coef (lm2)[2]
sehat2 <- se.coef (lm2) [2] 
skewy2 <- skew(y2)
stdevy2 <- stdev(y2)


results <- c(bhat1, sehat1, stdevy1, skewy1,
         bhat2, sehat2, stdevy2, skewy2)
names(results) <- c('b1', 'se1', 'sdy1', 'skewy1',
                'b2', 'se2', 'sdy2', 'skewy2')
return(results)
}


iter <-1000
n <-500


results <- NULL
sims <-ldply(1:iter, regsim, n)
sims$n <- n
results <- rbind(results, sims)

3 个答案:

答案 0 :(得分:0)

如何?

n <- 1000
x <- rnorm(n, 0, .5)

fun_reg <- function(n, ek_mu, ek_sd, x){
  s <- list() # list to collect results for output
  ek <- rnorm(n, ek_mu, ek_sd) 

  y <- .3*x + ek

  m <- lm(y ~ x)
  s$bhat <- coef(m)[2]
  s$sehat <- arm::se.coef(m)[2] 
  s$skewy <- psych::skew(y)
  s$stdevy <- fBasics::stdev(y)
  return(s)
}

purrr::map_dfr(c(1, 5, 10, 20, 50), ~fun_reg(n, 0, ., x))

编辑:

现在每个观测值有500个观测值,并且对于每个标准差值重复1000次绘制来进行回归。变量ek_sd已添加到最终输出中,以反映值所达到的标准偏差。请注意,x不会在每次迭代时重绘,但是我不确定,那就是您想要的。如果要在每次迭代中重绘x,请将其移入函数内。

n <- 500
x <- rnorm(n, 0, .5)

fun_reg <- function(n, ek_mu, ek_sd, x){
  s <- list()
  ek <- rnorm(n, ek_mu, ek_sd) 

  y <- .3*x + ek

  m <- lm(y ~ x)
  s$ek_sd <- ek_sd
  s$bhat <- coef(m)[2]
  s$sehat <- arm::se.coef(m)[2] 
  s$skewy <- psych::skew(y)
  s$stdevy <- fBasics::stdev(y)
  return(s)
}

intr <- unlist(lapply(c(1, 5, 10, 20, 50), rep, 1000))

purrr::map_dfr(intr, ~fun_reg(n, 0, ., x))

答案 1 :(得分:0)

另一个选择...

regsim <- function(n=100,num.y=5,sd=c(1:5)){
  if(length(sd) != num.y){stop('length of sd must match number of dependent vars')
  } else {
    ldply(1:num.y,function(x){
      e <- rnorm(n,0,sd=sd[x])
      x <- rnorm(n,0,5)
      y <- 0.3*x + e
      out <- lm(y~x)
      b1 <- coef(out)[2]
      int <- coef(out)[1]
      data.frame(b1=b1,int=int)
    })
    }
  }

regsim(num.y=10,sd=c(1:10))

            b1        int
1   0.30817303  0.0781049
2   0.38681600 -0.3359067
3   0.24560773 -0.0277561
4   0.08032659  0.1877233
5   0.39873955 -0.6027522
6   0.21729930  0.7384340
7   0.33761456 -0.1053028
8   0.26502006 -0.1851552
9   0.15452261 -1.6334873
10 -0.10496863 -0.3225169

这将使您可以指定每个误差项的因变量数和SD。然后,您可以使用replicate对所需的复制次数重复该功能。

replicate(10,regsim(),simplify = F)

[[1]]
         b1         int
1 0.3047779 -0.01984306
2 0.3133198 -0.20458410
3 0.2833979 -0.25307502
4 0.3066878 -0.03235019
5 0.1374949  0.10958616

[[2]]
         b1         int
1 0.2902103 -0.12683502
2 0.3499006  0.06691437
3 0.1949797 -0.14371830
4 0.2358269  0.53117467
5 0.2869511  0.16281380

[[3]]
         b1         int
1 0.2952211  0.05905549
2 0.2367774  0.02862166
3 0.0896778 -0.08467935
4 0.2352622 -0.20835837
5 0.3149963  0.07042032

[[4]]
         b1         int
1 0.2946468 -0.08266406
2 0.3322577  0.17558135
3 0.2200087 -0.25778150
4 0.1822915  0.34962679
5 0.2442479  0.34433656

[[5]]
         b1         int
1 0.2882853  0.12677506
2 0.3455534 -0.27885958
3 0.2981193  0.04598347
4 0.3380173  0.05243198
5 0.2148643 -0.09631672

[[6]]
         b1         int
1 0.2962269  0.03743759
2 0.2979327 -0.12830803
3 0.3352781 -0.03935422
4 0.2584965 -0.05924351
5 0.2856802  0.03430055

[[7]]
         b1         int
1 0.2968077 -0.10300109
2 0.2954560  0.25979902
3 0.3276077 -0.07001758
4 0.1825841  0.13508932
5 0.4302788 -0.13951914

[[8]]
         b1         int
1 0.2992147  0.02084806
2 0.2765976  0.07277813
3 0.2469616  0.44580403
4 0.2601966 -0.09849855
5 0.2679183  0.50501652

[[9]]
         b1         int
1 0.2963905  0.03308366
2 0.3356783 -0.06080088
3 0.3199835  0.22533444
4 0.3546083 -0.26909478
5 0.3536241 -0.19795094

[[10]]
         b1         int
1 0.3100336 -0.05228032
2 0.4076447 -0.18715063
3 0.3436858 -0.37518649
4 0.4569368 -0.09114672
5 0.3255668 -0.18738138

答案 2 :(得分:0)

这将软件包的依赖程度降低到psych::skew和可选的ggplot2调用:

library(psych)

regsim <- function(n, eks) {
  x <- rnorm(n, 0, .5)
  ek <- sapply(eks, function(x) rnorm(n, 0, x))
  y <- 0.3 * x + ek

  lms <- lm(y ~ x)

  data.frame(b_hat = lms[['coefficients']][2,],
             int = lms[['coefficients']][1, ],
             skew_y = psych::skew(y),
             se_hat = unlist(lapply(summary(lms), function(lst) lst[[4]][2,2]), use.names = FALSE),
             sd_y = apply(y, 2, sd),
             sd_eks = eks
             )
}

iter <-1000
n <-500

eks_sd = c(1,5)

# do the simulations and make them into a nice data.frame
sims <- replicate(iter, regsim(n, eks_sd), simplify = FALSE)
results <- do.call(rbind, sims)

#next parts are optional
results$iter_id <- rep(seq_len(iter), each = length(eks_sd))
tibble::as_tibble(results)

# Random graph because everyone loves graphs
library(ggplot2)
ggplot(results, aes(x = iter_id, y = int)) + geom_point() + facet_grid(vars(sd_eks))

最主要的是lm可以接受多个y参数。这就是为什么我们使用ek创建sapply矩阵的原因。

enter image description here