根据位置大小求和变量,以按R中的组创建最高值

时间:2017-06-30 14:59:43

标签: r data.table dplyr

我正在尝试找出以累积方式按组和总计对列进行求和的最佳方法。我想要做的是将group.1group.2的最高值作为新变量total_top_1。获得此值后,我想创建第二个变量total_top_2并将其添加到第一个值。从这些数据开始。

    df <- data.frame(stringsAsFactors=FALSE,
         group.1 = c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
                     "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B",
                     "B", "B", "B", "B", "B", "B", "B"),
         group.2 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
                     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
                     2L),
           value = c(56L, 74L, 39L, 86L, 7L, 100L, 18L, 43L, 68L, 32L, 4L, 17L,
                     18L, 24L, 94L, 11L, 67L, 87L, 96L, 93L, 31L, 29L, 31L,
                     25L, 64L, 23L, 44L, 92L, 36L, 93L)
)

我希望最终结果看起来像这样。

df_output <- data.frame(stringsAsFactors=FALSE,
  group.1 = c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
               "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
               "B", "B", "B", "B", "B", "B"),
  group.2 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
               1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
  value = c(56L, 74L, 39L, 86L, 7L, 100L, 18L, 43L, 68L, 32L, 4L, 17L,
               18L, 24L, 94L, 11L, 67L, 87L, 96L, 93L, 31L, 29L, 31L, 25L,
               64L, 23L, 44L, 92L, 36L, 93L),
  total_top_1 = c(100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 32L,
                   32L, 32L, 32L, 32L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L,
                   96L, 93L, 93L, 93L, 93L, 93L, 93L, 93L),
  total_top_2 = c(186L, 186L, 186L, 186L, 186L, 186L, 186L, 186L, 186L, 56L,
                   56L, 56L, 56L, 56L, 190L, 190L, 190L, 190L, 190L, 190L,
                   190L, 190L, 190L, 185L, 185L, 185L, 185L, 185L, 185L, 185L)

理想情况下,我希望能够为我指定的许多变量执行此操作。因此,我可能需要创建total_top_1total_top_5

要获得前1和2的总数,我可以这样做:

df %>% 
  arrange(group.1, group.2, desc(value)) %>% 
  group_by(group.1, group.2) %>% 
  top_n(
    n = 2, wt = value
  ) %>% 
  mutate(
    total_top_1 = value,
    total_top_2 = sum(value)
 )

但是,如果我想要获得前三名,我需要重复此步骤,请修改top_n,现在sum正在计算total_top_3我真正想要的是total_top_1 },total_top_2total_top_3。我认为有更好的方法来做到这一点。

1 个答案:

答案 0 :(得分:1)

我认为这就是你想要的。

library(reshape2)
df=df%>%group_by(group.1,group.2)%>%dplyr:: mutate(top_n=rank(-value,ties.method = "first"))
df$Group=paste(df$group.1,df$group.2)
df1=as.data.frame((acast(df, Group~top_n, value.var="value")))
df1[is.na(df1)]=0
df1=as.data.frame(t(apply(df1, 1, cumsum)))
names(df1)=paste("total_top_", colnames(df1))
df1$Group=rownames(df1)
df=merge(df,df1,by.x='Group',by.y='Group',all.x=T)

这是输出:

> df
   Group group.1 group.2 value top_n total_top_ 1 total_top_ 2 total_top_ 3 total_top_ 4 total_top_ 5 total_top_ 6 total_top_ 7 total_top_ 8 total_top_ 9
1    A 1       A       1    56     5          100          186          260          328          384          427          466          484          491
2    A 1       A       1    74     3          100          186          260          328          384          427          466          484          491
3    A 1       A       1    39     7          100          186          260          328          384          427          466          484          491
4    A 1       A       1    86     2          100          186          260          328          384          427          466          484          491
5    A 1       A       1     7     9          100          186          260          328          384          427          466          484          491
6    A 1       A       1   100     1          100          186          260          328          384          427          466          484          491
7    A 1       A       1    18     8          100          186          260          328          384          427          466          484          491
8    A 1       A       1    43     6          100          186          260          328          384          427          466          484          491
9    A 1       A       1    68     4          100          186          260          328          384          427          466          484          491
10   A 2       A       2    32     1           32           56           74           91           95           95           95           95           95
11   A 2       A       2     4     5           32           56           74           91           95           95           95           95           95
12   A 2       A       2    17     4           32           56           74           91           95           95           95           95           95
13   A 2       A       2    18     3           32           56           74           91           95           95           95           95           95
14   A 2       A       2    24     2           32           56           74           91           95           95           95           95           95
15   B 1       B       1    94     2           96          190          283          370          437          468          499          528          539
16   B 1       B       1    11     9           96          190          283          370          437          468          499          528          539
17   B 1       B       1    67     5           96          190          283          370          437          468          499          528          539
18   B 1       B       1    87     4           96          190          283          370          437          468          499          528          539
19   B 1       B       1    96     1           96          190          283          370          437          468          499          528          539
20   B 1       B       1    93     3           96          190          283          370          437          468          499          528          539
21   B 1       B       1    31     6           96          190          283          370          437          468          499          528          539
22   B 1       B       1    29     8           96          190          283          370          437          468          499          528          539
23   B 1       B       1    31     7           96          190          283          370          437          468          499          528          539
24   B 2       B       2    25     6           93          185          249          293          329          354          377          377          377
25   B 2       B       2    64     3           93          185          249          293          329          354          377          377          377
26   B 2       B       2    23     7           93          185          249          293          329          354          377          377          377
27   B 2       B       2    44     4           93          185          249          293          329          354          377          377          377
28   B 2       B       2    92     2           93          185          249          293          329          354          377          377          377
29   B 2       B       2    36     5           93          185          249          293          329          354          377          377          377
30   B 2       B       2    93     1           93          185          249          293          329          354          377          377          377