我有几个变量,每行100行。要从1到nrows重新采样,下面的代码给出了预期的结果,但它很乏味且不切实际。为了重现这种情况,我们假设y有5行:
y<-rnorm(n=5, mean=10, sd=2)
R=1000 #number of resamplings
boot.means = numeric(R)
for (i in 1:R) { boot.sample = sample(y, 1, replace=T)
boot.means[i] = mean(boot.sample) }
m1<-mean(boot.means)
d1<-sd(boot.means)
cv1 =(d1*100)/m1
R=1000 #number of resamplings
boot.means = numeric(R)
for (i in 1:R) { boot.sample = sample(y, 2, replace=T)
boot.means[i] = mean(boot.sample) }
m2<-mean(boot.means)
d2<-sd(boot.means)
cv2 =(d2*100)/m2
R=1000 #number of resamplings
boot.means = numeric(R)
for (i in 1:R) { boot.sample = sample(y, 3, replace=T)
boot.means[i] = mean(boot.sample) }
m3<-mean(boot.means)
d3<-sd(boot.means)
cv3 =(d3*100)/m3
R=1000 #number of resamplings
boot.means = numeric(R)
for (i in 1:R) { boot.sample = sample(y, 4, replace=T)
boot.means[i] = mean(boot.sample) }
m4<-mean(boot.means)
d4<-sd(boot.means)
cv4 =(d4*100)/m4
R=1000 #number of resamplings
boot.means = numeric(R)
for (i in 1:R) { boot.sample = sample(y, 5, replace=T)
boot.means[i] = mean(boot.sample) }
m5<-mean(boot.means)
d5<-sd(boot.means)
cv5 =(d5*100)/m5
CV.OK<-(c(cv1,cv2,cv3,cv4,cv5))
plot(CV.OK)
我想使用类似下面的代码,但它会产生意想不到的结果。拜托,有人可以帮忙。感谢。
R = 1000 #number of resamplings
boot.sample=seq(1,5, by=1)
boot.means = numeric(R)
boot.sd = numeric(R)
m = 5
d = 5
for (i in 1:5) {
for (j in 1:R) {
boot.sample[i] = sample(y, i, replace=T)
boot.means[j] = mean(boot.sample[i])
boot.sd[j] = sd(boot.sample[i])
m[i]=mean(boot.means[j])
d[i]=mean(boot.sd[j])
}
}
CV.Fail<-(d*100)/m
答案 0 :(得分:0)
我想你想要这个:
y<-rnorm(n=5, mean=10, sd=2)
R = 1000 #number of resamplings
CVs <- numeric(5)
for (i in 1:5) {
boot.means = numeric(R)
for (j in 1:R) {
boot.sample = sample(y, i, replace=T)
boot.means[j] = mean(boot.sample)
}
m=mean(boot.means)
d=sd(boot.means)
CVs[i] = (d*100)/m
}
plot(CVs)
答案 1 :(得分:0)
library(plyr)
library(dplyr)
# dummy data set
data_set = data.frame(value = runif(200), group = rep(c("a", "b"), each=100))
# create a function that takes the sample size as an argument
iterative_sample = function(sample_size, data){
# group the data (your 'n' equals the number of groups-
# here thats 'a' and 'b'
sample_temp = dplyr::group_by(data, group) %>%
# take x (sample size) samples from each group
sample_n(sample_size, replace=T) %>%
# compute summary stats for each group
summarize(mean = mean(value), sd = sd(value)) %>%
# attach the sample size to keep track
mutate(sample_size = sample_size)
# we must return a dataframe to uses ldply later on
return(sample_temp)
}
# thats the vector we are going to iterate over using ldply
sample_vect = c(1:2)
# ldplyr (plyr package) takes a list or vector and returns a dataframe and our custom
# function -checkout the manpage
# ?ldply
# ...
#
#
# .data: list to be processed
#
# .fun: function to apply to each piece
#
# ...: other arguments passed on to ‘.fun’
#
# ...
#
ldply(.data = sample_vect, .fun = iterative_sample, data_set)