基于R中其他列中的值对对行中的值求和

时间:2015-04-17 11:17:29

标签: r rows plyr

我想根据唯一的SizeOrig对来总结Dest列中的值,其中A-B不等于B-A。另外,我希望将这些值汇总为第1个月和第2个月,我现在要打电话给#34; 1"然后在名为" Semester"的新专栏中累积3和4(我想打电话给#34; 2")。我的真实数据比这个例子更复杂,但解决这个问题将帮助我计算出我的数据代码。非常感谢。

初始df将会跟随

Orig = c("A","B","A","B","A","B","A","B","A","B")
Dest = c("B","A","B","A","B","A","B","A","B","A")
Month = c(1,2,3,4,2,3,1,2,4,4)
Size = c(30,20,10,10,20,20,30,50,20,60)
df <- data.frame(Orig,Dest,Month,Size)
df
   Orig Dest Month Size
1     A    B     1   30
2     B    A     2   20
3     A    B     3   10
4     B    A     4   10
5     A    B     2   20
6     B    A     3   20
7     A    B     1   30
8     B    A     2   50
9     A    B     4   20
10    B    A     4   60

期望的结果如下:

  Orig Dest Semester Size
1    A    B        1   80
2    B    A        1   70
3    A    B        2   30
4    B    A        2   90

4 个答案:

答案 0 :(得分:4)

使用dplyr

library(dplyr)
df %>% mutate(Semester=ifelse(Month%in%c(1,2),1,2)) %>% 
  group_by(Semester,Orig,Dest) %>% 
  summarise(Size=sum(Size))

  Semester Orig Dest Size
1        1    A    B   80
2        1    B    A   70
3        2    A    B   30
4        2    B    A   90

虽然列顺序与您的略有不同。您可以在其中抛出select来重新排序它们。

答案 1 :(得分:3)

> require(data.table)
> dt1 <- data.table(df)
> setkey(dt1, Orig, Dest,Month) 
> df2 <- dt1[, list(Size=sum(Size)), by=list(Orig, Dest,Month)]
> df2
   Orig Dest Month Size
1:    A    B     1   60
2:    A    B     2   20
3:    A    B     3   10
4:    A    B     4   20
5:    B    A     2   70
6:    B    A     3   20
7:    B    A     4   70


> sapply(df2,class)
     Orig      Dest     Month      Size 
 "factor"  "factor" "numeric" "numeric" 

> library(plyr)
> df2$Month <- revalue(as.factor(df2$Month), c("1"="1", "2"="1","3"="2", "4"="2"))
> df2
   Orig Dest Month Size
1:    A    B     1   60
2:    A    B     1   20
3:    A    B     2   10
4:    A    B     2   20
5:    B    A     1   70
6:    B    A     2   20
7:    B    A     2   70
> df3 <- df2[, list(Size=sum(Size)), by=list(Orig, Dest,Month)]
> df3
   Orig Dest Month Size
1:    A    B     1   80
2:    A    B     2   30
3:    B    A     1   70
4:    B    A     2   90

答案 2 :(得分:3)

data.table

的另一个选项
library(data.table)
setDT(df)[, Semester:=(!Month %in% 1:2)+1L][,
        list(Size=sum(Size)) , .(Semester, Orig, Dest)]
#    Semester Orig Dest Size
#1:        1    A    B   80
#2:        1    B    A   70
#3:        2    A    B   30
#4:        2    B    A   90

或使用aggregate

中的base R
aggregate(Size~Orig+Dest +cbind( Semester=(!Month %in% 1:2)+1L), df, FUN=sum)
#   Orig Dest Semester Size
#1    B    A        1   70
#2    A    B        1   80
#3    B    A        2   90
#4    A    B        2   30

答案 3 :(得分:2)

这是使用dplyr

的另一种解决方案
group_by(df, Orig, Dest, r = ntile(Month, n = 2)) %>%
+     summarise(sum(Size))
Source: local data frame [4 x 4]
Groups: Orig, Dest

  Orig Dest r sum(Size)
1    A    B 1        80
2    A    B 2        30
3    B    A 1        70
4    B    A 2        90