嗨,我有一个dataframe,其中包含三列:名称列,数字列A和数字列B。我要做的是将行按它们在A列中的值进行装箱,然后找到B列中每个合并组的值的标准偏差。
我目前已经弄清楚了如何将垃圾箱设置成相等的长度间隔
sapply(df, function(x) {
bins <- with(x, cut(x$a, breaks = seq( floor(min(x$a)/10)*10, ceiling(max(x$a)/10)*10,
by= 10), include.lowest = T, labels = labels))})
但是我仍然坚持如何将bin应用于数据框并计算B列中每个bin组的一组值的标准偏差。
有人有什么建议吗?谢谢...
答案 0 :(得分:1)
jupyter notebook
答案 1 :(得分:0)
下面是一个示例,该示例基于合并的b
(其中应用了a
,即
ave
的{{1}}
df <- within(df, bins <- cut(a, breaks = seq( floor(min(a)/10)*10, ceiling(max(a)/10)*10, by= 10), include.lowest = T))
df <- within(df, b.sd <- ave(b,bins, FUN = sd))
这样
> df
a b bins b.sd
1 26.550866 -0.05612874 (20,30] 0.4936856
2 37.212390 -0.15579551 (30,40] 0.5699030
3 57.285336 -1.47075238 (50,60] 1.5255800
4 90.820779 -0.47815006 (90,100] 0.6159115
5 20.168193 0.41794156 (20,30] 0.4936856
6 89.838968 1.35867955 (80,90] 1.0255606
7 94.467527 -0.10278773 (90,100] 0.6159115
8 66.079779 0.38767161 (60,70] 1.3737593
9 62.911404 -0.05380504 (60,70] 1.3737593
10 6.178627 -1.37705956 [0,10] 0.4163140
11 20.597457 -0.41499456 (20,30] 0.4936856
12 17.655675 -0.39428995 (10,20] 0.5249513
13 68.702285 -0.05931340 (60,70] 1.3737593
14 38.410372 1.10002537 (30,40] 0.5699030
15 76.984142 0.76317575 (70,80] 0.7825274
16 49.769924 -0.16452360 (40,50] 1.2127445
17 71.761851 -0.25336168 (70,80] 0.7825274
18 99.190609 0.69696338 (90,100] 0.6159115
19 38.003518 0.55666320 (30,40] 0.5699030
20 77.744522 -0.68875569 (70,80] 0.7825274
21 93.470523 -0.70749516 (90,100] 0.6159115
22 21.214252 0.36458196 (20,30] 0.4936856
23 65.167377 0.76853292 (60,70] 1.3737593
24 12.555510 -0.11234621 (10,20] 0.5249513
25 26.722067 0.88110773 (20,30] 0.4936856
26 38.611409 0.39810588 (30,40] 0.5699030
27 1.339033 -0.61202639 [0,10] 0.4163140
28 38.238796 0.34111969 (30,40] 0.5699030
29 86.969085 -1.12936310 (80,90] 1.0255606
30 34.034900 1.43302370 (30,40] 0.5699030
31 48.208012 1.98039990 (40,50] 1.2127445
32 59.956583 -0.36722148 (50,60] 1.5255800
33 49.354131 -1.04413463 (40,50] 1.2127445
34 18.621760 0.56971963 (10,20] 0.5249513
35 82.737332 -0.13505460 (80,90] 1.0255606
36 66.846674 2.40161776 (60,70] 1.3737593
37 79.423986 -0.03924000 (70,80] 0.7825274
38 10.794363 0.68973936 (10,20] 0.5249513
39 72.371095 0.02800216 (70,80] 0.7825274
40 41.127443 -0.74327321 (40,50] 1.2127445
41 82.094629 0.18879230 (80,90] 1.0255606
42 64.706019 -1.80495863 (60,70] 1.3737593
43 78.293276 1.46555486 (70,80] 0.7825274
44 55.303631 0.15325334 (50,60] 1.5255800
45 52.971958 2.17261167 (50,60] 1.5255800
46 78.935623 0.47550953 (70,80] 0.7825274
47 2.333120 -0.70994643 [0,10] 0.4163140
48 47.723007 0.61072635 (40,50] 1.2127445
49 73.231374 -0.93409763 (70,80] 0.7825274
50 69.273156 -1.25363340 (60,70] 1.3737593
数据
set.seed(1)
df <- data.frame(a = runif(50,0,100),
b = rnorm(50))