我有一个多元数据集,我想按因子$Crop
的每个级别应用一个变异函数。我想到的最好的代码如下,但是我遇到了错误。我该如何解决?预先感谢。
blk_sum%>%
group_by(Rotation, Herbicide, Cohort, Crop)%>%
select(Dens,Event)%>%
split(.$Crop)%>%
mutate(.~Dens/first(Dens))
Error in UseMethod("mutate_") :
no applicable method for 'mutate_' applied to an object of class "list"
> dput(blk_sum[1:40,])
structure(list(Rotation = c("2-year", "2-year", "2-year", "2-year",
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year",
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year",
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year",
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year",
"3-year", "3-year", "3-year", "3-year", "3-year", "3-year", "3-year",
"3-year"), Herbicide = c("conv", "conv", "conv", "conv", "conv",
"conv", "conv", "conv", "conv", "conv", "conv", "conv", "conv",
"conv", "conv", "conv", "conv", "conv", "conv", "conv", "conv",
"conv", "conv", "low", "low", "low", "low", "low", "low", "low",
"low", "low", "conv", "conv", "conv", "conv", "conv", "conv",
"conv", "conv"), Crop = c("corn", "corn", "corn", "corn", "corn",
"corn", "corn", "corn", "corn", "soybean", "soybean", "soybean",
"soybean", "soybean", "soybean", "soybean", "soybean", "soybean",
"soybean", "soybean", "soybean", "soybean", "soybean", "corn",
"corn", "corn", "corn", "corn", "corn", "corn", "corn", "corn",
"corn", "corn", "corn", "corn", "corn", "corn", "corn", "corn"
), Event = c(1, 1, 1, 2, 2, 2, 3, 3, 4, 1, 1, 1, 1, 2, 2, 2,
2, 3, 3, 3, 4, 4, 5, 1, 1, 1, 2, 2, 2, 3, 3, 4, 1, 1, 1, 2, 2,
2, 3, 3), Cohort = c(1, 2, 3, 1, 2, 3, 1, 2, 1, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 1, 2, 1, 1, 2, 3, 1, 2, 3, 1, 2, 1, 1, 2, 3,
1, 2, 3, 1, 2), mean = c(482, 0.25, 0.666666666666667, 17.25,
0.5, 0, 8.25, 0, 6, 10, 2.75, 20.375, 1.375, 5, 1.625, 15.25,
0.833333333333333, 2.875, 1.5, 15, 2.125, 2.16666666666667, 1.66666666666667,
0.25, 76.5, 2, 0, 9, 1.33333333333333, 0.5, 5.75, 0.5, 89.75,
0.5, 0, 1.5, 0.5, 0, 0, 0.25), sd = c(539.175914397766, 0.5,
1.15470053837925, 23.4858113194612, 1, 0, 15.1739909054935, 0,
11.3431330181157, 9.68061391205567, 4.23421438150827, 19.5224230053546,
1.68501801601221, 4.20883424647321, 3.42000417710689, 13.4456578227216,
1.16904519445001, 2.23207142742853, 2.72554057547699, 12.4257796536073,
1.35620268186054, 3.06050104830347, 1.50554530541816, 0.5, 17.3301279087413,
2, 0, 4.24264068711928, 1.15470053837925, 1, 4.3493294502333,
1, 38.8705115308079, 1, 0, 2.38047614284762, 1, 0, 0, 0.5), Dens = c(79.2763157894737,
0.0411184210526316, 0.109649122807018, 2.83717105263158, 0.0822368421052632,
0, 1.35690789473684, 0, 0.986842105263158, 1.64473684210526,
0.452302631578947, 3.35115131578947, 0.226151315789474, 0.822368421052632,
0.267269736842105, 2.50822368421053, 0.137061403508772, 0.472861842105263,
0.246710526315789, 2.46710526315789, 0.349506578947368, 0.356359649122807,
0.274122807017544, 0.0411184210526316, 12.5822368421053, 0.328947368421053,
0, 1.48026315789474, 0.219298245614035, 0.0822368421052632, 0.945723684210526,
0.0822368421052632, 14.7615131578947, 0.0822368421052632, 0,
0.246710526315789, 0.0822368421052632, 0, 0, 0.0411184210526316
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-40L), groups = structure(list(Rotation = c("2-year", "2-year",
"2-year", "2-year", "2-year", "2-year", "2-year", "2-year", "2-year",
"2-year", "2-year", "2-year", "2-year", "3-year", "3-year", "3-year"
), Herbicide = c("conv", "conv", "conv", "conv", "conv", "conv",
"conv", "conv", "conv", "low", "low", "low", "low", "conv", "conv",
"conv"), Crop = c("corn", "corn", "corn", "corn", "soybean",
"soybean", "soybean", "soybean", "soybean", "corn", "corn", "corn",
"corn", "corn", "corn", "corn"), Event = c(1, 2, 3, 4, 1, 2,
3, 4, 5, 1, 2, 3, 4, 1, 2, 3), .rows = list(1:3, 4:6, 7:8, 9L,
10:13, 14:17, 18:20, 21:22, 23L, 24:26, 27:29, 30:31, 32L,
33:35, 36:38, 39:40)), row.names = c(NA, -16L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
答案 0 :(得分:1)
这能满足您的需求吗?
splitted = bulk_sum %>%
split(.$Crop)
lapply(splitted, function(df) {
df %>%
group_by(Rotation, Herbicide, Cohort, Crop) %>%
mutate(result = Dens / first(Dens))
}) %>%
bind_rows()
答案 1 :(得分:1)
我们可以使用map_dfr
包中的purrr
。通常,下面的方法与@pythonjokeun的解决方案相同,但是将map_dfr
包含到管道中会更容易。
library(dplyr)
library(purrr)
dat <- blk_sum %>%
group_by(Rotation, Herbicide, Cohort, Crop)%>%
select(Dens,Event)%>%
split(.$Crop) %>%
map_dfr(~mutate(.x, result = Dens/first(Dens)))
dat
# # A tibble: 40 x 7
# # Groups: Rotation, Herbicide, Cohort, Crop [13]
# Rotation Herbicide Cohort Crop Dens Event result
# <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
# 1 2-year conv 1 corn 79.3 1 1
# 2 2-year conv 2 corn 0.0411 1 1
# 3 2-year conv 3 corn 0.110 1 1
# 4 2-year conv 1 corn 2.84 2 0.0358
# 5 2-year conv 2 corn 0.0822 2 2
# 6 2-year conv 3 corn 0 2 0
# 7 2-year conv 1 corn 1.36 3 0.0171
# 8 2-year conv 2 corn 0 3 0
# 9 2-year conv 1 corn 0.987 4 0.0124
# 10 2-year low 1 corn 0.0411 1 1
# # ... with 30 more rows
实际上,在这种情况下,我们确实不需要split-apply-combine
策略。以下代码仅使用dplyr
中的函数并生成相同的输出。
dat2 <- blk_sum %>%
group_by(Rotation, Herbicide, Cohort, Crop) %>%
mutate(result = Dens/first(Dens)) %>%
select(Dens, Event, result)
dat2
# # A tibble: 40 x 7
# # Groups: Rotation, Herbicide, Cohort, Crop [13]
# Rotation Herbicide Cohort Crop Dens Event result
# <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
# 1 2-year conv 1 corn 79.3 1 1
# 2 2-year conv 2 corn 0.0411 1 1
# 3 2-year conv 3 corn 0.110 1 1
# 4 2-year conv 1 corn 2.84 2 0.0358
# 5 2-year conv 2 corn 0.0822 2 2
# 6 2-year conv 3 corn 0 2 0
# 7 2-year conv 1 corn 1.36 3 0.0171
# 8 2-year conv 2 corn 0 3 0
# 9 2-year conv 1 corn 0.987 4 0.0124
# 10 2-year conv 1 soybean 1.64 1 1
# # ... with 30 more rows
以下代码显示输出相同,尽管行顺序可能不同。
setequal(dat, dat2)
# [1] TRUE