我是新手重新采样技术和引导r,所以任何帮助将不胜感激。我有一个由不同个体组成的数据框,有时候不止一次测量过。我想从群体中的每个个体中随机抽取单个测量值,然后计算总体平均值和标准差。我想多次(500左右)执行此过程并获得两个新的数据帧,一个具有所有总体平均值,另一个具有每个变量的所有总体标准偏差(Dim.1到Dim.4)。由此,我可以提取下游分析的全局均值和标准差。以下是数据框的外观(“ID”是每个人的唯一编号,您可以看到每个人都有不同数量的重复测量值。)
ID Dim.1 Dim.2 Dim.3 Dim.4
41 0.4001945 1.15899378 0.269197195 0.184791153
14 2.1615710 1.15712356 -0.096055808 0.450943821
63 0.4325496 0.75521068 0.085588532 -0.233144806
53 1.2459718 0.97450610 -0.069171367 -0.613423267
63 1.3380629 0.22606572 -0.061178395 -0.304960508
42 1.6048214 0.94184036 0.232863647 -0.201738198
57 1.3306709 0.80440736 -0.955949551 -0.734022636
53 0.7019118 0.87285991 -0.042557052 -0.146748989
51 0.7235493 0.29946448 0.474477629 0.305810371
53 1.2431220 1.20252749 -0.073627812 0.237740020
41 1.1788653 0.55536570 -0.017354302 0.119014260
14 2.5769809 0.18551630 0.634304132 0.617288243
67 1.0445458 1.47107481 0.024383348 0.111808376
31 0.9759513 1.31091796 -0.008660192 0.189962355
63 1.8621687 0.97137412 0.317014897 -0.390871248
76 0.5905190 1.49817641 -0.374503265 0.142478388
90 2.4323563 0.87696545 0.467220123 0.513197279
67 2.2378032 0.35682721 0.400233674 -0.926848226
41 1.7098808 0.40470067 0.050950910 -0.153059068
97 1.5351169 1.11597681 0.011878347 -0.092047152
63 1.2647155 0.80006707 0.730022680 -0.089726522
57 1.7200676 0.01358165 0.450075592 0.038352174
76 0.6949196 1.36741272 -1.286488394 0.477345585
123 2.4235534 1.69165605 0.528863655 0.447856674
76 -2.4022432 -0.27531557 -1.850999153 2.194893741
117 1.6955740 -1.86088122 1.502655438 0.856026945
117 0.7130716 1.44198379 -1.495098987 -1.021981479
131 0.8425548 1.22970621 -0.160634720 0.005202717
117 1.0913048 1.19834030 -0.240309947 0.279379075
90 2.5787954 0.21638781 0.973339314 0.853752379
105 1.4989440 1.31525062 0.233114414 0.082557111
45 0.4749492 0.36264159 0.016554066 0.434416650
14 1.9841503 -0.18133091 -0.517021686 0.131796394
这是dput版本......
structure(list(AnID = structure(c(3L, 1L, 9L, 7L, 9L, 4L, 8L,
7L, 6L, 7L, 3L, 1L, 10L, 2L, 9L, 11L, 12L, 10L, 3L, 13L, 9L,
8L, 11L, 16L, 11L, 15L, 15L, 17L, 15L, 12L, 14L, 5L, 1L), .Label = c("14",
"31", "41", "42", "45", "51", "53", "57", "63", "67", "76", "90",
"97", "105", "117", "123", "131"), class = "factor"), Dim.1 = c(0.400194544195721,
2.16157096683054, 0.432549610256816, 1.24597182598991, 1.33806287869605,
1.60482137307563, 1.33067093524332, 0.701911835019105, 0.723549265733465,
1.24312199041168, 1.17886527411877, 2.57698094739979, 1.04454579781695,
0.975951278566957, 1.86216869726173, 0.590519015534528, 2.43235630542313,
2.23780317751189, 1.70988079418724, 1.53511692947232, 1.26471553939687,
1.72006761902848, 0.694919562457936, 2.42355344632234, -2.40224317003857,
1.69557401848893, 0.713071563313831, 0.84255475961074, 1.09130484807346,
2.57879543707134, 1.49894397171646, 0.474949215360165, 1.9841503256016
), Dim.2 = c(1.15899377720071, 1.15712355628702, 0.755210676050028,
0.974506103663373, 0.226065715930444, 0.941840360304357, 0.804407356238532,
0.872859912826886, 0.299464475124326, 1.2025274866889, 0.55536570304097,
0.185516296049789, 1.47107481283135, 1.31091795925695, 0.971374119614307,
1.49817640676682, 0.876965451353274, 0.356827207847936, 0.404700668672103,
1.11597680662439, 0.800067070614603, 0.0135816493815426, 1.36741271705742,
1.69165605426992, -0.275315573666507, -1.86088122056554, 1.44198379044125,
1.229706212058, 1.19834030462339, 0.216387812905091, 1.31525061699366,
0.362641590025834, -0.181330912913297), Dim.3 = c(0.269197195180612,
-0.0960558078596061, 0.0855885321454752, -0.0691713671666404,
-0.0611783947257435, 0.232863646917399, -0.955949551451659, -0.0425570523689114,
0.474477629049467, -0.0736278121798866, -0.0173543018324465,
0.634304131880689, 0.0243833483864922, -0.00866019164798527,
0.317014896588811, -0.374503264871839, 0.467220123029729, 0.400233673552903,
0.0509509097106227, 0.0118783465387495, 0.730022679967163, 0.450075591988245,
-1.28648839432794, 0.528863655457902, -1.85099915345691, 1.50265543792412,
-1.49509898726221, -0.160634720376254, -0.24030994662375, 0.973339313851613,
0.233114414466102, 0.0165540663395682, -0.517021685999838), Dim.4 = c(0.184791153018369,
0.45094382124022, -0.233144806193005, -0.613423266807646, -0.304960507895512,
-0.201738198311526, -0.734022636110577, -0.146748988783387, 0.305810371055691,
0.237740020179384, 0.11901425952943, 0.61728824337695, 0.111808376374363,
0.189962354663836, -0.390871248426407, 0.14247838773032, 0.513197279323348,
-0.926848226311571, -0.153059067639092, -0.0920471522899872,
-0.0897265219239891, 0.0383521738356584, 0.477345585143069, 0.447856673901548,
2.19489374105159, 0.856026944966164, -1.02198147948597, 0.00520271670521917,
0.279379074573862, 0.853752378937349, 0.0825571109781094, 0.434416649778733,
0.131796393683415)), .Names = c("AnID", "Dim.1", "Dim.2", "Dim.3",
"Dim.4"), class = "data.frame", row.names = c("20", "26", "36",
"46", "49", "52", "75", "93", "94", "110", "118", "124", "132",
"143", "157", "168", "185", "199", "210", "211", "215", "225",
"240", "245", "248", "250", "254", "270", "272", "281", "297",
"322", "337"))
答案 0 :(得分:1)
你走了:
boot_id <- function(df) {
s <- sample((1:nrow(df)), size=1, replace=F)
return(df[s,])
}
boot_dat <- function(df, n= 500, f= c("mean", "sd")) {
f <- match.arg(f, c("mean", "sd"), several.ok = FALSE)
res <- matrix(NA, nrow= n, ncol= ncol(df)-1)
for (i in 1:n) {
df2 <- df[, boot_id(.SD), by= "ID"]
df2$ID <- NULL
if (f == "mean") {
res[i,] <- colMeans(df2)
} else {
res[i,] <- apply(df2, 2, sd)
}
}
return(res)
}
# dt <- <your structure>
names(dt) <- c("ID", "d1", "d2", "d3", "d4")
library(data.table)
dt <- data.table(dt)
setkey(dt, ID)
dat_means <- boot_dat(dt, f= "mean")
dat_sds <- boot_dat(dt, f= "sd")