我在管道功能调用中使用dplyr
和group_by
时遇到问题。
可重复示例:
使用以下数据:
ex_data<- structure(list(word1 = c("no", "not", "not", "no", "not", "not",
"not", "not", "no", "not", "no", "not", "not", "not", "no", "not",
"no", "no", "not", "not", "not", "no", "not", "without", "never",
"no", "not", "no", "no", "not", "not", "not", "no", "no", "no",
"not", "not", "without", "never", "no", "not", "not", "not",
"not", "not", "never", "no", "no", "not", "not"), word2 = c("doubt",
"like", "help", "no", "want", "wish", "allow", "care", "harm",
"sorry", "great", "leave", "pretend", "worth", "pleasure", "love",
"danger", "want", "afraid", "doubt", "fail", "good", "forget",
"feeling", "forget", "matter", "avoid", "chance", "hope", "forgotten",
"miss", "perfectly", "bad", "better", "opportunity", "admit",
"fair", "delay", "failed", "wish", "dislike", "distress", "refuse",
"regret", "trust", "want", "evil", "greater", "better", "blame"
), score = c(-1L, 2L, 2L, -1L, 1L, 1L, 1L, 2L, -2L, -1L, 3L,
-1L, -1L, 2L, 3L, 3L, -2L, 1L, -2L, -1L, -2L, 3L, -1L, 1L, -1L,
1L, -1L, 2L, 2L, -1L, -2L, 3L, -3L, 2L, 2L, -1L, 2L, -1L, -2L,
1L, -2L, -2L, -2L, -2L, 1L, 1L, -3L, 3L, 2L, -2L), n = c(102L,
99L, 82L, 60L, 45L, 39L, 36L, 23L, 22L, 21L, 19L, 18L, 18L, 17L,
16L, 16L, 15L, 15L, 15L, 14L, 14L, 13L, 13L, 13L, 12L, 12L, 12L,
11L, 11L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L), contribution = c(-102L,
198L, 164L, -60L, 45L, 39L, 36L, 46L, -44L, -21L, 57L, -18L,
-18L, 34L, 48L, 48L, -30L, 15L, -30L, -14L, -28L, 39L, -13L,
13L, -12L, 12L, -12L, 22L, 22L, -10L, -20L, 30L, -27L, 18L, 18L,
-9L, 18L, -9L, -16L, 8L, -16L, -16L, -16L, -16L, 8L, 7L, -21L,
21L, 14L, -14L)), .Names = c("word1", "word2", "score", "n",
"contribution"), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
常规的典型管道操作按预期工作:
outside_result<- ex_data %>%
mutate(word2=reorder(word2,contribution)) %>%
group_by(word1) %>%
top_n(10,abs(contribution)) %>%
group_by(word1,word2) %>%
arrange(desc(contribution)) %>%
ungroup() %>%
mutate(word2 = factor(paste(word2,word1, sep = "__"),
levels=rev(paste(word2,word1,sep="__"))))
我已将上述功能实现为以下功能:
order_bars <- function(df,facetPanel,barCategory,value){
df %>% mutate(barCategory=reorder(barCategory,value)) %>%
group_by(facetPanel) %>%
top_n(10,abs(value)) %>%
group_by(facetPanel,barCategory) %>%
arrange(desc(value)) %>%
ungroup() %>%
mutate(barCategory = factor(paste(barCategory,facetPanel, sep = "__"),
levels=rev(paste(barCategory,facetPanel,sep="__"))))
}
根据此post的建议,在函数内的mutate操作期间引用data.frame的变量时使用$
表示法。
inside_result<-order_bars(ex_data,ex_data$word1,ex_data$word2,ex_data$contribution)
R抛出以下错误:
Error: unknown variable to group by : facetPanel
Called from: resolve_vars(new_groups, tbl_vars(.data))
我怀疑group_by
需要调整以获取命名变量,或者我必须使用.dot
符号来引用列,尽管我只是把它抛到风中...... / p>
答案 0 :(得分:2)
您需要了解如何使用1)dplyr
动词的SE版本,例如group_by_
和mutate_
以及2}神秘lazyeval::interp
。请仔细阅读vignette("nse")
。
然后我们可以来:
order_bars <- function(df, facetPanel, barCategory, value){
require(lazyeval)
df %>%
mutate_(barCategory = interp(~reorder(x, y), x = as.name(barCategory),
y = as.name(value))) %>%
group_by_(facetPanel) %>%
filter_(interp(~min_rank(desc(abs(x))) <= 10, x = as.name(value))) %>%
group_by_(facetPanel, barCategory) %>%
arrange_(interp(~desc(x), x = as.name(value))) %>%
ungroup() %>%
mutate_(barCategory = interp(
~factor(paste(x, y, sep = "__"), levels = rev(paste(x, y, sep = "__"))),
x = as.name(barCategory), y = as.name(facetPanel)))
}
order_bars(ex_data, 'word1', 'word2', 'contribution')
# A tibble: 25 × 6 word1 word2 score n contribution barCategory <chr> <chr> <int> <int> <int> <fctr> 1 not like 2 99 198 like__not 2 not help 2 82 164 help__not 3 no great 3 19 57 great__no 4 no pleasure 3 16 48 pleasure__no 5 not love 3 16 48 love__not 6 not care 2 23 46 care__not 7 not want 1 45 45 want__not 8 not wish 1 39 39 wish__not 9 no good 3 13 39 good__no 10 not allow 1 36 36 allow__not
请注意,我们需要将top_n
替换为filter_
语句,因为不存在top_n_
。查看top_n
的来源,可以明确地构建filter_
语句。
或者,如果您想获得幻想,可以编写order_bars
的NSE版本:
order_bars <- function(df,facetPanel,barCategory,value){
facetPanel <- substitute(facetPanel)
barCategory <- substitute(barCategory)
value <- substitute(value)
require(lazyeval)
df %>%
mutate_(barCategory = interp(~reorder(x, y), x = barCategory, y = value)) %>%
group_by_(facetPanel) %>%
filter_(interp(~min_rank(desc(abs(x))) <= 10, x = value)) %>%
group_by_(facetPanel, barCategory) %>%
arrange_(interp(~desc(x), x = value)) %>%
ungroup() %>%
mutate_(barCategory = interp(
~factor(paste(x, y, sep = "__"), levels = rev(paste(x, y, sep = "__"))),
x = barCategory, y = facetPanel))
}
order_bars(ex_data, word1, word2, contribution)
理想情况下,您只能完全编写SE版本,并使用lazyeval
将NSE版本链接到SE版本。我将此作为练习留给读者。
答案 1 :(得分:1)
通过rlang_0.4.0
和dplyr_0.8.2
,我们可以使用整齐的求值运算符({{...}})或curl-curly将单引号和反引号抽象为一个插值步骤。
library(rlang)
library(dplyr)
order_barsN <- function(df, facetPanel, barCategory, value) {
df %>%
mutate(barCategory = reorder({{barCategory}}, {{value}}))%>%
group_by({{facetPanel}}) %>%
filter(min_rank(desc(abs({{value}}))) <= 10) %>%
group_by({{facetPanel}}, {{barCategory}}) %>%
arrange(desc({{value}})) %>%
ungroup %>%
mutate(barCategory = factor(str_c({{barCategory}}, {{facetPanel}}, sep="__"),
levels = rev(str_c({{barCategory}}, {{facetPanel}}, sep="__"))))
}
out2 <- order_barsN(ex_data, word1, word2, contribution)
-检查上一个答案
out1 <- order_bars(ex_data, word1, word2, contribution)
identical(out1, out2)
#[1] TRUE