按组的四分位数在数据框中保存为新变量

时间:2016-04-28 04:47:52

标签: r quantile

我的数据看起来像这样:

id <- c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,9)
yr <- c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3)
gr <- c(3,4,5,3,4,5,3,4,5,4,5,6,4,5,6,4,5,6,5,6,7,5,6,7,5,6,7)
x <- c(33,48,31,41,31,36,25,38,28,17,39,53,60,60,19,39,34,47,20,28,38,15,17,49,48,45,39)
df <- data.frame(id,yr,gr,x)

   id yr gr  x
1   1  1  3 33
2   1  2  4 48
3   1  3  5 31
4   2  1  3 41
5   2  2  4 31
6   2  3  5 36
7   3  1  3 25
8   3  2  4 38
9   3  3  5 28
10  4  1  4 17
11  4  2  5 39
12  4  3  6 53
13  5  1  4 60
14  5  2  5 60
15  5  3  6 19
16  6  1  4 39
17  6  2  5 34
18  6  3  6 47
19  7  1  5 20
20  7  2  6 28
21  7  3  7 38
22  8  1  5 15
23  8  2  6 17
24  8  3  7 49
25  9  1  5 48
26  9  2  6 45
27  9  3  7 39

我想在数据框中创建一个新变量,其中包含&#34; x&#34;的分位数。在&#34; yr&#34;的每个独特组合内计算和&#34; gr&#34;。也就是说,而不是找到&#34; x&#34;的分位数。基于示例中的所有27行数据,我想通过两个分组变量计算分位数:yr和gr。例如,&#34; x&#34;的分位数。当yr = 1且gr = 3,yr = 1且gr = 4等时

一旦计算出这些值,我希望它们作为单个列附加到数据框,例如&#34; x_quant&#34;。

我能够将数据拆分为我需要的单独组,并且我知道如何计算分位数,但是我在以适合在现有数据中创建新列的方式组合这两个步骤时遇到了问题帧。

所有可以提供的任何帮助都会非常有用!非常感谢!

〜KJ

1 个答案:

答案 0 :(得分:0)

# turn "yr" and "gr" into sortable column
df$y <- paste(df$yr,"",df$gr)
df.ordered <- df[order(df$y),] #sort df based on group
grp <- split(df.ordered,df.ordered$y);grp

# get quantiles and turn results into string
q <- vector('list')
for (i in 1:length(grp)) {
  a <- quantile(grp[[i]]$x)
  q[i] <- paste(a[1],"",a[2],"",a[3],"",a[4],"",a[5])
}
x_quant <- unlist(sapply(q, `[`, 1))
x_quant <- rep(x_quant,each=3)

# append quantile back to data frame. Gave new column a more descriptive name
df.ordered$xq_0_25_50_75_100 <- x_quant
df.ordered$y <- NULL
df <- df.ordered;df </pre>

输出:

> # turn "yr" and "gr" into sortable column
> df$y <- paste(df$yr,"",df$gr)
> df.ordered <- df[order(df$y),] #sort df based on group
> grp <- split(df.ordered,df.ordered$y);grp
$`1  3`
  id yr gr  x    y
1  1  1  3 33 1  3
4  2  1  3 41 1  3
7  3  1  3 25 1  3

$`1  4`
   id yr gr  x    y
10  4  1  4 17 1  4
13  5  1  4 60 1  4
16  6  1  4 39 1  4

$`1  5`
   id yr gr  x    y
19  7  1  5 20 1  5
22  8  1  5 15 1  5
25  9  1  5 48 1  5

$`2  4`
  id yr gr  x    y
2  1  2  4 48 2  4
5  2  2  4 31 2  4
8  3  2  4 38 2  4

$`2  5`
   id yr gr  x    y
11  4  2  5 39 2  5
14  5  2  5 60 2  5
17  6  2  5 34 2  5

$`2  6`
   id yr gr  x    y
20  7  2  6 28 2  6
23  8  2  6 17 2  6
26  9  2  6 45 2  6

$`3  5`
  id yr gr  x    y
3  1  3  5 31 3  5
6  2  3  5 36 3  5
9  3  3  5 28 3  5

$`3  6`
   id yr gr  x    y
12  4  3  6 53 3  6
15  5  3  6 19 3  6
18  6  3  6 47 3  6

$`3  7`
   id yr gr  x    y
21  7  3  7 38 3  7
24  8  3  7 49 3  7
27  9  3  7 39 3  7

> # get quantiles and turn results into string
> q <- vector('list')
> for (i in 1:length(grp)) {
+ a <- quantile(grp[[i]]$x)
+ q[i] <- paste(a[1],"",a[2],"",a[3],"",a[4],"",a[5])
+ }
> x_quant <- unlist(sapply(q, `[`, 1))
> x_quant <- rep(x_quant,each=3)
> # append quantile back to data frame
> df.ordered$xq_0_25_50_75_100 <- x_quant
> df.ordered$y <- NULL
> df <- df.ordered
> df
   id yr gr  x      xq_0_25_50_75_100
1   1  1  3 33     25  29  33  37  41
4   2  1  3 41     25  29  33  37  41
7   3  1  3 25     25  29  33  37  41
10  4  1  4 17   17  28  39  49.5  60
13  5  1  4 60   17  28  39  49.5  60
16  6  1  4 39   17  28  39  49.5  60
19  7  1  5 20   15  17.5  20  34  48
22  8  1  5 15   15  17.5  20  34  48
25  9  1  5 48   15  17.5  20  34  48
2   1  2  4 48   31  34.5  38  43  48
5   2  2  4 31   31  34.5  38  43  48
8   3  2  4 38   31  34.5  38  43  48
11  4  2  5 39 34  36.5  39  49.5  60
14  5  2  5 60 34  36.5  39  49.5  60
17  6  2  5 34 34  36.5  39  49.5  60
20  7  2  6 28 17  22.5  28  36.5  45
23  8  2  6 17 17  22.5  28  36.5  45
26  9  2  6 45 17  22.5  28  36.5  45
3   1  3  5 31 28  29.5  31  33.5  36
6   2  3  5 36 28  29.5  31  33.5  36
9   3  3  5 28 28  29.5  31  33.5  36
12  4  3  6 53     19  33  47  50  53
15  5  3  6 19     19  33  47  50  53
18  6  3  6 47     19  33  47  50  53
21  7  3  7 38   38  38.5  39  44  49
24  8  3  7 49   38  38.5  39  44  49
27  9  3  7 39   38  38.5  39  44  49
>