使用NSE enquo与dply和tidyr(完整问题(嵌套(...),...)

时间:2018-04-23 11:55:37

标签: r dplyr tidyr rlang tidyeval

我正在尝试将NSE与dplyrtidyr一起使用。

我有一份产品清单,每个地区每月的产量。产品和区域具有分层结构(ProductLevel1比ProductLevel2更粗糙,比Product Level3更粗糙......)。在制作图表之前,我想使用tidyr::nestingtidyr::complete添加没有音量的月份。

这个功能似乎失败了tidyr::nesting

f_test <- function(IS1, finerProductLevel, finerRegionLevel) {  
  qfinerProductLevel <- enquo(finerProductLevel)
  qfinerRegionLevel <- enquo(finerRegionLevel)
  IS2 <- IS1 %>% 
    group_by(calDate, !! qfinerRegionLevel, !! qfinerProductLevel) %>%
    summarize(Vol = sum(Vol, na.rm = TRUE)) %>%
    ungroup()

  IS3 <- IS2 %>%
    complete(nesting(!! qfinerProductLevel, !! qfinerRegionLevel), 
             calDate, fill = list(Vol=0))
}
f_test(IS1, ProductLevel4, RegionLevel4_18)

我收到的错误是:

Error in eval_tidy(xs[[i]], unique_output) : object 'ProductLevel4' not found 

如果我在嵌套函数中删除!!,则错误为

Error: Columns `qfinerProductLevel`, `qfinerRegionLevel` must be 1d atomic vectors or lists 

过去有一个类似的问题没有回答:dplyr programming: unquote-splicing causes overscope error with complete() and nesting()

数据集的示例:

IS1 <- structure(list(COPASalesOffice = c("DACH", "DACH", "EMO", "EMO", 
"France", "French Africa", "Israel", "Italy", "Middle East", 
"Nordic", "South Hub", "South Hub", "Spain Portugal", "Spain Portugal", 
"Turkey", "Turkey", "Turkey", "UK Ireland", "UK Ireland"), calDate = structure(c(16861, 
17440, 16983, 17410, 16436, 16709, 16161, 16222, 17136, 16526, 
16861, 16922, 16587, 17136, 16495, 16283, 16983, 16222, 16740
), class = "Date"), Vol = c(75.17, 121.731, 0, 23, 0, 15, 30, 
173.36, 0, 9.217, -1, 0, 0.849, 12.154, 0, 0, 25, 1, 20.313), 
    RegionLevel0 = c("EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", 
    "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", 
    "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR"
    ), RegionLevel1 = c("Europe", "Europe", "MEAR", "MEAR", "Europe", 
    "MEAR", "MEAR", "Europe", "MEAR", "Europe", "Europe", "Europe", 
    "Europe", "Europe", "Europe", "Europe", "Europe", "Europe", 
    "Europe"), RegionLevel2 = c("EU5-NB", "EU5-NB", "CIS", "CIS", 
    "EU5-NB", "MEA", "MEA", "EU5-NB", "MEA", "EU5-NB", "CEE", 
    "CEE", "EU5-NB", "EU5-NB", "CEE", "CEE", "CEE", "EU5-NB", 
    "EU5-NB"), RegionLevel4_18 = c("R01", "R01", "R20", "R20", 
    "R03", "R14", "R17", "R04", "R12", "R06", "R08", "R08", "R05", 
    "R05", "R09", "R09", "R09", "R02", "R02"), ProductLevel1 = c("aqueous", 
    "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", 
    "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", 
    "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous"
    ), ProductLevel2 = c("aq8", "aq8", "aq8", "aq8", "aq8", "aq4", 
    "aq4", "aq4", "aq8", "aq8", "aq8", "aq8", "aq8", "aq8", "aq8", 
    "aq8", "aq4", "aq8", "aq8"), ProductLevel3 = c("8.24-44", 
    "8.24-44", "8.24-44", "8.24-44", "8.24-44", "4.24-44", "4.24-44", 
    "4.24-44", "8.A2", "8.24-44", "64in", "8.A2", "8.24-44", 
    "64in", "8.24-44", "8.24-44", "4.24-44", "8.24-44", "8.24-44"
    ), ProductLevel4 = c("T636-596-642, H&H", "T804, SC-P6/7/8/9000", 
    "T636-596-642, H&H", "T636-596-642, H&H", "T40X, 9000", "T692-693-694-698 Opr", 
    "T611-612, 74x0-94x0", "T611-612, 74x0-94x0", "T850, SCP800", 
    "T543-544, 40-76-9600", "T591, 11880", "T605-606, 48x0", 
    "T543-544, 40-76-9600", "T591, 11880", "T46X, 7000", "T602-603, 78x0-98x0", 
    "T692-693-694-698 Opr", "T47X, 9500", "T543-544, 40-76-9600"
    )), row.names = c(NA, -19L), class = c("tbl_df", "tbl", "data.frame"
), .Names = c("COPASalesOffice", "calDate", "Vol", "RegionLevel0", 
"RegionLevel1", "RegionLevel2", "RegionLevel4_18", "ProductLevel1", 
"ProductLevel2", "ProductLevel3", "ProductLevel4"))

和SessionInfo:

Rstudio 1.1.442

R version 3.4.4 (2018-03-15)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)
Matrix products: default

locale:
[1] LC_COLLATE=French_France.1252  LC_CTYPE=French_France.1252    LC_MONETARY=French_France.1252 LC_NUMERIC=C                   LC_TIME=French_France.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base  

other attached packages:
[1] forcats_0.3.0   stringr_1.3.0   dplyr_0.7.4     purrr_0.2.4     readr_1.1.1     tidyr_0.8.0     tibble_1.4.2    ggplot2_2.2.1   tidyverse_1.2.1

1 个答案:

答案 0 :(得分:0)

在对f_test()函数的调用中,您传递的第二个和第三个参数是不存在的对象的名称。您可能想引用它们以便它们作为字符串传递。此外,您可能希望f_test()函数返回某些内容...我怀疑return(IS3)是您期望的结果。

IS1 <- structure(list(COPASalesOffice = c("DACH", "DACH", "EMO", "EMO", "France", "French Africa", "Israel", "Italy", "Middle East", "Nordic", "South Hub", "South Hub", "Spain Portugal", "Spain Portugal", "Turkey", "Turkey", "Turkey", "UK Ireland", "UK Ireland"), calDate = structure(c(16861, 17440, 16983, 17410, 16436, 16709, 16161, 16222, 17136, 16526, 16861, 16922, 16587, 17136, 16495, 16283, 16983, 16222, 16740 ), class = "Date"), Vol = c(75.17, 121.731, 0, 23, 0, 15, 30, 173.36, 0, 9.217, -1, 0, 0.849, 12.154, 0, 0, 25, 1, 20.313), RegionLevel0 = c("EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR", "EMEAR" ), RegionLevel1 = c("Europe", "Europe", "MEAR", "MEAR", "Europe", "MEAR", "MEAR", "Europe", "MEAR", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe"), RegionLevel2 = c("EU5-NB", "EU5-NB", "CIS", "CIS", "EU5-NB", "MEA", "MEA", "EU5-NB", "MEA", "EU5-NB", "CEE", "CEE", "EU5-NB", "EU5-NB", "CEE", "CEE", "CEE", "EU5-NB", "EU5-NB"), RegionLevel4_18 = c("R01", "R01", "R20", "R20", "R03", "R14", "R17", "R04", "R12", "R06", "R08", "R08", "R05", "R05", "R09", "R09", "R09", "R02", "R02"), ProductLevel1 = c("aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous", "aqueous" ), ProductLevel2 = c("aq8", "aq8", "aq8", "aq8", "aq8", "aq4", "aq4", "aq4", "aq8", "aq8", "aq8", "aq8", "aq8", "aq8", "aq8", "aq8", "aq4", "aq8", "aq8"), ProductLevel3 = c("8.24-44", "8.24-44", "8.24-44", "8.24-44", "8.24-44", "4.24-44", "4.24-44", "4.24-44", "8.A2", "8.24-44", "64in", "8.A2", "8.24-44", "64in", "8.24-44", "8.24-44", "4.24-44", "8.24-44", "8.24-44" ), ProductLevel4 = c("T636-596-642, H&H", "T804, SC-P6/7/8/9000", "T636-596-642, H&H", "T636-596-642, H&H", "T40X, 9000", "T692-693-694-698 Opr", "T611-612, 74x0-94x0", "T611-612, 74x0-94x0", "T850, SCP800", "T543-544, 40-76-9600", "T591, 11880", "T605-606, 48x0", "T543-544, 40-76-9600", "T591, 11880", "T46X, 7000", "T602-603, 78x0-98x0", "T692-693-694-698 Opr", "T47X, 9500", "T543-544, 40-76-9600" )), row.names = c(NA, -19L), class = c("tbl_df", "tbl", "data.frame" ), .Names = c("COPASalesOffice", "calDate", "Vol", "RegionLevel0", "RegionLevel1", "RegionLevel2", "RegionLevel4_18", "ProductLevel1", "ProductLevel2", "ProductLevel3", "ProductLevel4"))

library(dplyr)
library(tidyr)

f_test <- function(IS1, finerProductLevel, finerRegionLevel) {  
  qfinerProductLevel <- enquo(finerProductLevel)
  qfinerRegionLevel <- enquo(finerRegionLevel)
  IS2 <- IS1 %>% 
    group_by(calDate, !! qfinerRegionLevel, !! qfinerProductLevel) %>%
    summarize(Vol = sum(Vol, na.rm = TRUE)) %>%
    ungroup()

  IS3 <- IS2 %>%
    complete(nesting(!! qfinerProductLevel, !! qfinerRegionLevel), 
             calDate, fill = list(Vol=0))

  return(IS3)
}
f_test(IS1, "ProductLevel4", "RegionLevel4_18")

根据评论进行更新

如果你试图复制这样的结果......

IS1 %>% 
  group_by(calDate, ProductLevel4, RegionLevel4_18) %>%
  summarize(Vol = sum(Vol, na.rm = TRUE)) %>%
  ungroup() %>%
  complete(nesting(ProductLevel4, RegionLevel4_18), 
           calDate, fill = list(Vol=0))

具有排除&#34;裸变量名称的功能&#34; (不是字符串),像这样......

f_test(IS1, ProductLevel4, RegionLevel4_18)

然后你就可以这样写......

f_test <-
  function(df, grp1, grp2) {
    sym_grp1 <- rlang::ensym(grp1)
    sym_grp2 <- rlang::ensym(grp2)
    df %>% 
      group_by(calDate, !!sym_grp1, !!sym_grp2) %>% 
      summarize(Vol = sum(Vol, na.rm = TRUE)) %>%
      ungroup() %>%
      complete(nesting(!!sym_grp1, !!sym_grp2), 
               calDate, fill = list(Vol=0))
  }

f_test(IS1, ProductLevel4, RegionLevel4_18)

进一步更新

您还可以使用rlang::ensyms()一次执行两个/所有变量,然后!!!取消引用并拼接它们......

test <- function(data, var1, var2) {
  vars <- rlang::ensyms(var1, var2)

  data %>%
    group_by(calDate, !!!vars) %>%
    summarize(Vol = sum(Vol, na.rm = TRUE)) %>%
    ungroup() %>%
    complete(nesting(!!!vars), calDate, fill = list(Vol=0))
}
test(IS1, ProductLevel4, RegionLevel4_18)

然后允许您使用...接受任意数量的参数......

test <- function(data, ...) {
  vars <- rlang::ensyms(...)

  data %>%
    group_by(calDate, !!!vars) %>%
    summarize(Vol = sum(Vol, na.rm = TRUE)) %>%
    ungroup() %>%
    complete(nesting(!!!vars), calDate, fill = list(Vol=0))
}
test(IS1, ProductLevel4, RegionLevel4_18)
test(IS1, ProductLevel4)
test(IS1, ProductLevel4, RegionLevel4_18, ProductLevel2)