在split()之后使用mutate()

时间:2019-04-15 18:11:35

标签: r list dplyr

我有一个多元数据集,我想按因子$Crop的每个级别应用一个变异函数。我想到的最好的代码如下,但是我遇到了错误。我该如何解决?预先感谢。

blk_sum%>%
  group_by(Rotation, Herbicide, Cohort, Crop)%>%
  select(Dens,Event)%>%
  split(.$Crop)%>%
  mutate(.~Dens/first(Dens)) 

Error in UseMethod("mutate_") : 
  no applicable method for 'mutate_' applied to an object of class "list"

> dput(blk_sum[1:40,])
structure(list(Rotation = c("2-year", "2-year", "2-year", "2-year", 
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year", 
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year", 
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year", 
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year", 
"3-year", "3-year", "3-year", "3-year", "3-year", "3-year", "3-year", 
"3-year"), Herbicide = c("conv", "conv", "conv", "conv", "conv", 
"conv", "conv", "conv", "conv", "conv", "conv", "conv", "conv", 
"conv", "conv", "conv", "conv", "conv", "conv", "conv", "conv", 
"conv", "conv", "low", "low", "low", "low", "low", "low", "low", 
"low", "low", "conv", "conv", "conv", "conv", "conv", "conv", 
"conv", "conv"), Crop = c("corn", "corn", "corn", "corn", "corn", 
"corn", "corn", "corn", "corn", "soybean", "soybean", "soybean", 
"soybean", "soybean", "soybean", "soybean", "soybean", "soybean", 
"soybean", "soybean", "soybean", "soybean", "soybean", "corn", 
"corn", "corn", "corn", "corn", "corn", "corn", "corn", "corn", 
"corn", "corn", "corn", "corn", "corn", "corn", "corn", "corn"
), Event = c(1, 1, 1, 2, 2, 2, 3, 3, 4, 1, 1, 1, 1, 2, 2, 2, 
2, 3, 3, 3, 4, 4, 5, 1, 1, 1, 2, 2, 2, 3, 3, 4, 1, 1, 1, 2, 2, 
2, 3, 3), Cohort = c(1, 2, 3, 1, 2, 3, 1, 2, 1, 1, 2, 3, 4, 1, 
2, 3, 4, 1, 2, 3, 1, 2, 1, 1, 2, 3, 1, 2, 3, 1, 2, 1, 1, 2, 3, 
1, 2, 3, 1, 2), mean = c(482, 0.25, 0.666666666666667, 17.25, 
0.5, 0, 8.25, 0, 6, 10, 2.75, 20.375, 1.375, 5, 1.625, 15.25, 
0.833333333333333, 2.875, 1.5, 15, 2.125, 2.16666666666667, 1.66666666666667, 
0.25, 76.5, 2, 0, 9, 1.33333333333333, 0.5, 5.75, 0.5, 89.75, 
0.5, 0, 1.5, 0.5, 0, 0, 0.25), sd = c(539.175914397766, 0.5, 
1.15470053837925, 23.4858113194612, 1, 0, 15.1739909054935, 0, 
11.3431330181157, 9.68061391205567, 4.23421438150827, 19.5224230053546, 
1.68501801601221, 4.20883424647321, 3.42000417710689, 13.4456578227216, 
1.16904519445001, 2.23207142742853, 2.72554057547699, 12.4257796536073, 
1.35620268186054, 3.06050104830347, 1.50554530541816, 0.5, 17.3301279087413, 
2, 0, 4.24264068711928, 1.15470053837925, 1, 4.3493294502333, 
1, 38.8705115308079, 1, 0, 2.38047614284762, 1, 0, 0, 0.5), Dens = c(79.2763157894737, 
0.0411184210526316, 0.109649122807018, 2.83717105263158, 0.0822368421052632, 
0, 1.35690789473684, 0, 0.986842105263158, 1.64473684210526, 
0.452302631578947, 3.35115131578947, 0.226151315789474, 0.822368421052632, 
0.267269736842105, 2.50822368421053, 0.137061403508772, 0.472861842105263, 
0.246710526315789, 2.46710526315789, 0.349506578947368, 0.356359649122807, 
0.274122807017544, 0.0411184210526316, 12.5822368421053, 0.328947368421053, 
0, 1.48026315789474, 0.219298245614035, 0.0822368421052632, 0.945723684210526, 
0.0822368421052632, 14.7615131578947, 0.0822368421052632, 0, 
0.246710526315789, 0.0822368421052632, 0, 0, 0.0411184210526316
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-40L), groups = structure(list(Rotation = c("2-year", "2-year", 
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year", 
"2-year", "2-year", "2-year", "2-year", "3-year", "3-year", "3-year"
), Herbicide = c("conv", "conv", "conv", "conv", "conv", "conv", 
"conv", "conv", "conv", "low", "low", "low", "low", "conv", "conv", 
"conv"), Crop = c("corn", "corn", "corn", "corn", "soybean", 
"soybean", "soybean", "soybean", "soybean", "corn", "corn", "corn", 
"corn", "corn", "corn", "corn"), Event = c(1, 2, 3, 4, 1, 2, 
3, 4, 5, 1, 2, 3, 4, 1, 2, 3), .rows = list(1:3, 4:6, 7:8, 9L, 
    10:13, 14:17, 18:20, 21:22, 23L, 24:26, 27:29, 30:31, 32L, 
    33:35, 36:38, 39:40)), row.names = c(NA, -16L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE)) 

2 个答案:

答案 0 :(得分:1)

这能满足您的需求吗?

splitted = bulk_sum %>%
  split(.$Crop)

lapply(splitted, function(df) {
  df %>%
    group_by(Rotation, Herbicide, Cohort, Crop) %>%
    mutate(result = Dens / first(Dens)) 
}) %>%
  bind_rows()

答案 1 :(得分:1)

我们可以使用map_dfr包中的purrr。通常,下面的方法与@pythonjokeun的解决方案相同,但是将map_dfr包含到管道中会更容易。

library(dplyr)
library(purrr)

dat <- blk_sum %>%
  group_by(Rotation, Herbicide, Cohort, Crop)%>%
  select(Dens,Event)%>%
  split(.$Crop) %>%
  map_dfr(~mutate(.x, result = Dens/first(Dens))) 
dat
# # A tibble: 40 x 7
# # Groups:   Rotation, Herbicide, Cohort, Crop [13]
#   Rotation Herbicide Cohort Crop     Dens Event result
#   <chr>    <chr>      <dbl> <chr>   <dbl> <dbl>  <dbl>
# 1 2-year   conv           1 corn  79.3        1 1     
# 2 2-year   conv           2 corn   0.0411     1 1     
# 3 2-year   conv           3 corn   0.110      1 1     
# 4 2-year   conv           1 corn   2.84       2 0.0358
# 5 2-year   conv           2 corn   0.0822     2 2     
# 6 2-year   conv           3 corn   0          2 0     
# 7 2-year   conv           1 corn   1.36       3 0.0171
# 8 2-year   conv           2 corn   0          3 0     
# 9 2-year   conv           1 corn   0.987      4 0.0124
# 10 2-year   low            1 corn   0.0411     1 1     
# # ... with 30 more rows

实际上,在这种情况下,我们确实不需要split-apply-combine策略。以下代码仅使用dplyr中的函数并生成相同的输出。

dat2 <- blk_sum %>%
  group_by(Rotation, Herbicide, Cohort, Crop) %>%
  mutate(result = Dens/first(Dens)) %>%
  select(Dens, Event, result)
dat2
# # A tibble: 40 x 7
# # Groups:   Rotation, Herbicide, Cohort, Crop [13]
# Rotation Herbicide Cohort Crop       Dens Event result
#   <chr>    <chr>      <dbl> <chr>     <dbl> <dbl>  <dbl>
# 1 2-year   conv           1 corn    79.3        1 1     
# 2 2-year   conv           2 corn     0.0411     1 1     
# 3 2-year   conv           3 corn     0.110      1 1     
# 4 2-year   conv           1 corn     2.84       2 0.0358
# 5 2-year   conv           2 corn     0.0822     2 2     
# 6 2-year   conv           3 corn     0          2 0     
# 7 2-year   conv           1 corn     1.36       3 0.0171
# 8 2-year   conv           2 corn     0          3 0     
# 9 2-year   conv           1 corn     0.987      4 0.0124
# 10 2-year   conv           1 soybean  1.64       1 1     
# # ... with 30 more rows

以下代码显示输出相同,尽管行顺序可能不同。

setequal(dat, dat2)
# [1] TRUE