根据2个或更多不同的过滤器划分变量

时间:2017-09-11 16:47:12

标签: r dplyr mutate

type在2001年和2011年为0,从{2000年到2020年的所有其他年份为1time为年(2000:2020),GEO1_AM }是区域的代码,totals是每个区域的人,TOT每次都是人。我想在数据框中添加另一列,factor2time

的数据框进行子集化
dt %>%
group_by(type, time, GEO1_AM) %>%
mutate(factor2=((factor1[time==2011]-factor1[time==2001])*.1*9 + 
totals[time==2011]/TOT[time==2001]*totals[time==2020]))

我无法运行,因为它给了我错误!

    type    time    GEO1_AM totals  TOT     factor1     factor2
     0      2001    51901   1103170 3265600 0.337815409 1030652.924
     0      2001    51902   143830  3265600 0.044043974 129617.8109
     0      2001    51903   281810  3265600 0.086296546 255477.4554
     0      2001    51904   277140  3265600 0.084866487 259294.0418
     0      2001    51905   239720  3265600 0.073407643 228274.562
     0      2001    51906   303960  3265600 0.093079373 229637.6104
     0      2001    51907   284500  3265600 0.087120284 246383.8565
     0      2001    51908   288950  3265600 0.088482974 246753.8304
     0      2001    51909   149360  3265600 0.045737384 136189.7353
     0      2001    51910   56100   3265600 0.017179079 50521.05616
     0      2001    51911   137060  3265600 0.041970848 125879.1172

dt = structure(list(type = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1), time = c(2001L, 2001L, 2001L, 2001L, 2001L, 
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2011L, 2011L, 2011L, 
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2000L, 
2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 
2012L, 2013L, 2014L, 2015L, 2016L, 2017L, 2018L, 2019L, 2020L
), GEO1_AM = c(51901L, 51902L, 51903L, 51904L, 51905L, 51906L, 
51907L, 51908L, 51909L, 51910L, 51911L, 51901L, 51902L, 51903L, 
51904L, 51905L, 51906L, 51907L, 51908L, 51909L, 51910L, 51911L, 
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 
51L, 51L, 51L, 51L, 51L, 51L), totals = c(1103170, 143830, 281810, 
277140, 239720, 303960, 284500, 288950, 149360, 56100, 137060, 
1058580, 133130, 262400, 266320, 234460, 235860, 253060, 253440, 
139880, 51890, 129290, 3069591, 3033896, 3017814, 3000608, 2981252, 
2958500, 2933053, 2908216, 2888586, 2877308, 2881917, 2893514, 
2906218, 2916955, 2924817, 2930439, 2934146, 2936713, 2938682
), TOT = c(3265600, 3265600, 3265600, 3265600, 3265600, 3265600, 
3265600, 3265600, 3265600, 3265600, 3265600, 3018310, 3018310, 3018310,     
3018310, 3018310, 3018310, 3018310, 3018310, 3018310, 3018310, 3018310,     3069591, 3033896, 3017814, 3000608, 2981252, 2958500, 2933053, 2908216, 2888586,     2877308, 2881917, 2893514, 2906218, 2916955, 2924817, 2930439, 2934146, 2936713,     2938682), factor1 = c(0.33781540911318, 0.0440439735423812, 0.086296545810877,     0.0848664870161685, 0.0734076433121019, 0.0930793728564429, 0.0871202841744243, 
0.0884829740323371, 0.0457373836354728, 0.0171790788829005,     0.0419708476237139, 0.350719442336937, 0.044107464110711, 0.0869360668718588,     0.0882348068952493, 0.0776792310928964, 0.0781430668155358, 0.0838416199793924, 0.083967518246966,  0.0463438149162942, 0.0171917397484023, 0.0428352289857569, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("type", "time",     "GEO1_AM", "totals", "TOT", "factor1"), class = c("grouped_df", "tbl_df", "tbl",     "data.frame"), row.names = c(NA, -41L), vars = c("type", "time", "GEO1_AM"),     labels = structure(list(type = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), time = c(2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 
2001L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2000L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2012L, 2013L, 2014L, 2015L, 2016L, 2017L, 2018L, 2019L, 2020L), GEO1_AM = c(51901L, 51902L, 51903L, 51904L, 51905L, 51906L, 51907L, 51908L, 51909L,     51910L, 51911L, 51901L, 51902L, 51903L, 51904L, 51905L, 51906L, 51907L, 51908L, 
51909L, 51910L, 51911L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,     51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L)), class = "data.frame", row.names =     c(NA, -41L), vars = c("type", "time", "GEO1_AM"), drop = TRUE, .Names = c("type", "time", "GEO1_AM")), indices = list(0L, 1L, 2L, 3L, 4L, 5L, 6L,     7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L), drop = TRUE, group_sizes 
= c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)

0 个答案:

没有答案