Question

我的模拟目的是评估几种因素组合下的测试的1类错误率。

样本量 - （10,10），（10,25），（25,25），（25,50），（25,100），50,25），（50,100），（100,25）），（100,100）
标准差比率 - （1.00,1.50,2.00,2.50,3.00和3.50）
具有不等偏度和相等偏度的伽马分布分布

所涉及的2个样本测试是合并方差t检验和welch t检验以及mann whitney检验。我尝试使用上述因素组合修改代码。

########################################
    #for normal distribution setup

# to ensure the reproducity of the result 
#(here we declare the random seed generator) 
set.seed(1)

## Put the samples sizes into matrix then use a loop for sample sizes
 sample_sizes<-matrix(c(10,10,10,25,25,25,25,50,25,100,50,25,50,100,100,25,100,100),
 nrow=2)

#create vector to combine all std deviations
sds<-matrix(c(4,4,6,4,8,4,10,4,12,4,14,4),nrow=2)

sd1<-c(4,6,8,10,12)
sd2<-c(4,4,4,4,4)
sds2<-rep(sd2,each=9)

##(use expand.grid)to create a data frame from combination of data
ss_sds1<- expand.grid(sample_sizes[2,], sd1)

#create a matrix combining the fifty four cases of combination of ss and sds
all_combine <- cbind(rep(sample_sizes[1,], 5), ss_sds1,sds2)

# name the column by sample samples 1 and 2 and standard deviation
colnames(all_combine) <- c("m", "n", "sds1","sds2")

#number of simulations 
nSims<-10000

#set significance level,alpha for the whole simulation
alpha<-0.05       

#set up matrix for storing data from simulation
#set nrow =nsims because wan storing every p-value simulated
matrix1_equal  <-matrix(0,nrow=nSims,ncol=9)
matrix4_unequal<-matrix(0,nrow=nSims,ncol=9)
matrix7_mann   <-matrix(0,nrow=nSims,ncol=9)

#set up vector for storing data from the three tests (nrow for all_combine=45)
equal1  <- unequal4<- mann7 <- rep(0, nrow(all_combine))

  # this loop steps through the all_combine matrix
  for(ss in 1:nrow(all_combine))  
  {
   #generate samples from the first column and second column
    m<-all_combine[ss,1]
    n<-all_combine[ss,2]   

      for (sim in 1:nSims)
      {
      #generate random samples from 2 normal distribution
      x<-rnorm(m,5,all_combine[ss,3])
      y<-rnorm(n,5,4)

      #extract p-value out and store every p-value into matrix
      matrix1_equal[sim,1]<-t.test(x,y,var.equal=TRUE)$p.value    
      matrix4_unequal[sim,4]<-t.test(x,y,var.equal=FALSE)$p.value 
      matrix7_mann[sim,7] <-wilcox.test(x,y)$p.value 
       }

     ##store the result
     equal1[ss]<- mean(matrix1_equal[,1]<=alpha)
     unequal4[ss]<-mean(matrix4_unequal[,4]<=alpha)
     mann7[ss]<- mean(matrix7_mann[,7]<=alpha)
  }

   # combine results
    nresult <- cbind(all_combine, equal1, unequal4, mann7)

    save.image(file="normal.data")

我是R的新手，现在我已经完成了正态分布的代码，并且必须通过使用if else添加另外两个关于伽马分布分布的模拟...任何人都可以提供一些建议如何从正常分布更改。伽马分配？我现在正在坚持这一部分......

HELP !! 上面的代码给了我几次0.00的结果，我已经多次检查它们但我没有发现任何错误。请

Answer 1

这是我目前的编码..

 ########################################
    #for normal distribution setup

# to ensure the reproducity of the result 
#(here we declare the random seed generator) 
set.seed(1)

## Put the samples sizes into matrix then use a loop for sample sizes
 sample_sizes<-matrix(c(10,10,10,25,25,25,25,50,25,100,50,25,50,100,100,25,100,100),
 nrow=2)

#create vector to combine all std deviations
sds<-matrix(c(4,4,6,4,8,4,10,4,12,4,14,4),nrow=2)

sd1<-c(4,6,8,10,12)
sd2<-c(4,4,4,4,4)
sds2<-rep(sd2,each=9)

##(use expand.grid)to create a data frame from combination of data
ss_sds1<- expand.grid(sample_sizes[2,], sd1)

#create a matrix combining the fifty four cases of combination of ss and sds
all_combine <- cbind(rep(sample_sizes[1,], 5), ss_sds1,sds2)

# name the column by sample samples 1 and 2 and standard deviation
colnames(all_combine) <- c("m", "n", "sds1","sds2")

#number of simulations 
nSims<-10000

#set significance level,alpha for the whole simulation
alpha<-0.05       

#set up matrix for storing data from simulation
#set nrow =nsims because wan storing every p-value simulated
matrix1_equal  <-matrix(0,nrow=nSims,ncol=9)
matrix4_unequal<-matrix(0,nrow=nSims,ncol=9)
matrix7_mann   <-matrix(0,nrow=nSims,ncol=9)

#set up vector for storing data from the three tests (nrow for all_combine=45)
equal1  <- unequal4<- mann7 <- rep(0, nrow(all_combine))

  # this loop steps through the all_combine matrix
  for(ss in 1:nrow(all_combine))  
  {
   #generate samples from the first column and second column
    m<-all_combine[ss,1]
    n<-all_combine[ss,2]   

      for (sim in 1:nSims)
      {
      #generate random samples from 2 normal distribution
      x<-rnorm(m,5,all_combine[ss,3])
      y<-rnorm(n,5,4)

      #extract p-value out and store every p-value into matrix
      matrix1_equal[sim,1]<-t.test(x,y,var.equal=TRUE)$p.value    
      matrix4_unequal[sim,4]<-t.test(x,y,var.equal=FALSE)$p.value 
      matrix7_mann[sim,7] <-wilcox.test(x,y)$p.value 
       }

     ##store the result
     equal1[ss]<- mean(matrix1_equal[,1]<=alpha)
     unequal4[ss]<-mean(matrix4_unequal[,4]<=alpha)
     mann7[ss]<- mean(matrix7_mann[,7]<=alpha)
  }

   # combine results
    nresult <- cbind(all_combine, equal1, unequal4, mann7)

    save.image(file="normal.data")

Answer 2

我编辑了您的代码以测试类型1错误。对于每个因子组合而不是具有多个嵌套for循环，我更喜欢将所有这些组合放入单个矩阵中并对所述矩阵的每一行进行模拟。这样可以更容易地绘制结果。为了加快计算速度，请注意我做了更少的simualations（我更改了nSims），你想要更改它。最后，您可以将三个结果矩阵组合到不同的因子组合中。

我不知道你在(**ss-1)*nsds+sim**发生了什么，并选择改变它。

#for normal distribution setup

## Put the samples sizes into matrix then use a loop for sample sizes
 sample_sizes<-
  matrix(c(10,10,10,25,25,25,25,50,25,100,50,25,50,100,100,25,100,100),
     nrow=2)

#create vector to combine all std deviations
sds<-c(4,6,8,10,12,14)

# get all combinations with one row of the sample_sizes matrix
all_combn <- expand.grid(sample_sizes[2,], sds)

# tack on the first row

all_combn <- cbind(rep(sample_sizes[1,], 6), all_combn)
# change the column names
colnames(all_combn) <- c("ss1", "ss2", "sds")


# to ensure the reproducity of the result 
#(here we declare the random seed generator) 
set.seed(1)

#number of simulations 
nSims<-500    

# to store your simulations for the three tests
store_sim <- matrix(0, nrow = nSims, ncol = 3)
#set significance level,alpha for the whole simulatio
alpha<-0.05       


#set up vector for storing data from the three tests

equal  <- unequal<- mann <- rep(0, nrow(all_combn))


# outer loop run nsims for every combinations of std deviations and ss


  # this loop steps through the all_combn matrix
  for(ss in 1:nrow(all_combn))  
  {
    m<-all_combn[ss,1]
    n<-all_combn[ss,2]   

      for (sim in 1:nSims)
      {
      #generate random samples from 2 normal distribution
      x<-rnorm(m,5,all_combn[ss,3])
      y<-rnorm(n,5,4)

      #extract p-value out and store it in vectors
      store_sim[sim,1]<-t.test(x,y,var.equal=TRUE)$p.value    
      store_sim[sim,2]<-t.test(x,y,var.equal=FALSE)$p.value 
      store_sim[sim,3] <-wilcox.test(x,y)$p.value 

    }

  ##store the result into matrix defined before
  equal[ss]<- sum(store_sim[,1]<alpha)/nSims
  unequal[ss]<- sum(store_sim[,2]<alpha)/nSims
  mann[ss]<- sum(store_sim[,2]<alpha)/nSims
  }


# combine results

answer <- cbind(all_combn, equal, unequal, mann)

head(answer)

  ss1 ss2 sds equal unequal  mann
1  10  10   4 0.070   0.062 0.062
2  10  25   4 0.046   0.048 0.048
3  25  25   4 0.048   0.048 0.048
4  25  50   4 0.038   0.048 0.048
5  25 100   4 0.058   0.054 0.054
6  50  25   4 0.048   0.054 0.054

R：编码为什么在结果中显示0.00

2 个答案: