R按多个组应用加权运算

时间:2019-04-26 09:16:26

标签: r data-manipulation

嗨,我有一个像这样的数据集:

City = c(1,1,1,1,1,2,2,2,2,2,3,3,3,3)
Area=c("A","B","A","B","A","A","B","B","B","A","A","B","A","A")
Weights=c(2.4,1.9,0.51,0.7,2.2,1.5,1.86,1.66,1.09,2.57,2.4,0.9,3.4,3.7)
Tax=c(16,93,96,44,67,73,12,65,81,22,39,94,41,30)
z = data.frame(City,Area,Weights,Tax)

看起来像这样:

enter image description here

我要做的是获取每个城市和每个地区的加权税。

例如第1行上方的计算值是:

2.4 * 16 /(2.40 + 0.51 + 2.20)等。

我可以使用以下功能做到这一点:

cit_data=list()
weighted_tax=function(z){
  for (cit in unique(z$City)){
    city_data=z[z$City==cit,]
    area_new=list()
    for (ar in unique(z$Area)){
      area_data=city_data[city_data$Area==ar,]
      area_data$area_dat_n = (area_data$Weight*area_data$Tax)/sum(area_data$Weights)
      area_new=rbind(area_new,area_data)
    }
    cit_data=rbind(cit_data,area_new)
  }
  return(cit_data)
}
tax=weighted_tax(z)

有没有更简单/更干净的方法来实现这一目标?预先感谢。

2 个答案:

答案 0 :(得分:2)

使用dplyr

library(dplyr)

z %>%
group_by(City, Area) %>%
mutate(Weighted_tax = Tax*Weights/sum(Weights))

输出:

# A tibble: 14 x 5
# Groups:   City, Area [6]
    City Area  Weights   Tax Weighted_tax
   <dbl> <fct>   <dbl> <dbl>        <dbl>
 1     1 A        2.4     16         7.51
 2     1 B        1.9     93        68.0 
 3     1 A        0.51    96         9.58
 4     1 B        0.7     44        11.8 
 5     1 A        2.2     67        28.8 
 6     2 A        1.5     73        26.9 
 7     2 B        1.86    12         4.84
 8     2 B        1.66    65        23.4 
 9     2 B        1.09    81        19.2 
10     2 A        2.57    22        13.9 
11     3 A        2.4     39         9.85
12     3 B        0.9     94        94.  
13     3 A        3.4     41        14.7 
14     3 A        3.7     30        11.7 

答案 1 :(得分:1)

我们也可以使用by在基数R中完成此操作

do.call(rbind, by(z, z[c("City", "Area")], function(x) 
  cbind(x, area.dat.n=with(x, Weights * Tax / sum(Weights)))))
#    City Area Weights Tax area.dat.n
# 1     1    A    2.40  16   7.514677
# 3     1    A    0.51  96   9.581213
# 5     1    A    2.20  67  28.845401
# 6     2    A    1.50  73  26.904177
# 10    2    A    2.57  22  13.891892
# 11    3    A    2.40  39   9.852632
# 13    3    A    3.40  41  14.673684
# 14    3    A    3.70  30  11.684211
# 2     1    B    1.90  93  67.961538
# 4     1    B    0.70  44  11.846154
# 7     2    B    1.86  12   4.841649
# 8     2    B    1.66  65  23.405640
# 9     2    B    1.09  81  19.151844
# 12    3    B    0.90  94  94.000000

或使用ave

cbind(z, 
      area.dat.n=
        apply(cbind(z, w=with(z, ave(Weights, City, Area, FUN=sum))), 1, function(x) 
          x[3] * x[4] / x[5]))
#    City Area Weights Tax area.dat.n
# 1     1    1    2.40  16   7.514677
# 2     1    2    1.90  93  67.961538
# 3     1    1    0.51  96   9.581213
# 4     1    2    0.70  44  11.846154
# 5     1    1    2.20  67  28.845401
# 6     2    1    1.50  73  26.904177
# 7     2    2    1.86  12   4.841649
# 8     2    2    1.66  65  23.405640
# 9     2    2    1.09  81  19.151844
# 10    2    1    2.57  22  13.891892
# 11    3    1    2.40  39   9.852632
# 12    3    2    0.90  94  94.000000
# 13    3    1    3.40  41  14.673684
# 14    3    1    3.70  30  11.684211

数据

z <- structure(list(City = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 
3), Area = structure(c(1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 
1L, 2L, 1L, 1L), .Label = c("A", "B"), class = "factor"), Weights = c(2.4, 
1.9, 0.51, 0.7, 2.2, 1.5, 1.86, 1.66, 1.09, 2.57, 2.4, 0.9, 3.4, 
3.7), Tax = c(16, 93, 96, 44, 67, 73, 12, 65, 81, 22, 39, 94, 
41, 30)), class = "data.frame", row.names = c(NA, -14L))