用dplyr编程 - 如何处理引号/引号

时间:2018-02-12 02:05:38

标签: r dplyr

我需要创建一个使用dplyr执行数据库调用的包装函数。

首先创建一个可重现的示例:

library("DBI")
library("dplyr")
conn = DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")

df = expand.grid(indate = as.character(as.POSIXct(seq(as.Date('2017/06/06'), as.Date('2018/02/12'), by="day"))), name = c("Canada","Japan","USA"), stringsAsFactors = FALSE)

copy_to(conn, df, "lineups_country",
        temporary = FALSE, 
        indexes = list(
          "indate",
          "name"
        )
)

这是没有包装函数的代码:

res = tbl(conn, table)

# filter the country
res = res %>% filter(name %in% c("Canada","Japan"))

# filter the date
res = res %>% filter(indate >= "2018-01-01")

res %>% show_query()
df2=res %>% collect()
unique(df$name);unique(df2$name)
min(df$indate);min(df2$indate)

现在要创建包装函数,我已阅读文档https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html

然而,事情对我来说还不是很清楚,特别是关于引号/引号。

这是我试过的:

myFun = function(conn, table, 
                 dateCol   = "indate", 
                 startDate = as.POSIXct("2018-01-01"), 
                 key       = list(name = c("Australia","Japan"))) {


  on.exit({dbDisconnect(conn)})
  res = tbl(conn, table) 

  res %>% show_query()

  # filter the country
  countryCol = names(key)
  enquo_country <- enquo(countryCol) #enquo_country <- rlang::sym(countryCol) #
  res = res %>% filter(!!enquo_country %in% key[[1]])

  res %>% show_query()

  # filter the date
  enquo_dateCol <- enquo(dateCol) #enquo_country <- rlang::sym(names(key)) #
  res = res %>% filter(!!enquo_dateCol >= as.character(startDate))

  res %>% show_query()

  return(res %>% collect())
}

给出错误:

  

匹配错误(x,table,nomatch = 0L):&#39;匹配&#39;需要矢量   参数

2 个答案:

答案 0 :(得分:1)

你需要改变一些事情:

  • 指定一个表,而不是table,这是一个函数;
  • 对通过调用sym返回的字符向量使用names(key)将其变成一个结果;
  • 如果您要使用enquo,请不要引用dateCol。如果您要引用它,请使用sym
  • 一致地命名startDate;
  • startDate转换为字符没有特别的意义;无论如何都处理得很好。
library("DBI")
library("dplyr")

conn = DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
df = expand.grid(indate = as.character(as.POSIXct(seq(as.Date('2017/06/06'), as.Date('2018/02/12'), by="day"))), 
                 name = c("Canada","Japan","USA"), stringsAsFactors = FALSE)

copy_to(conn, df, "lineups_country",
        temporary = FALSE, 
        indexes = list("indate", "name"))

myFun = function(conn, table, 
                 dateCol   = indate, 
                 startDate = as.POSIXct("2018-01-01"), 
                 key       = list(name = c("Australia","Japan"))) {
    on.exit({dbDisconnect(conn)})
    res = tbl(conn, table)     
    res %>% show_query()

    # filter the country
    enquo_country <- sym(names(key))    # use `sym` here
    res = res %>% filter(!!enquo_country %in% key[[1]])      
    res %>% show_query()

    # filter the date
    enquo_dateCol <- enquo(dateCol)
    res = res %>% filter(!!enquo_dateCol >= startDate)
    res %>% show_query()

    return(res %>% collect())
}

现在:

df2 <- myFun(conn, 
      table = "lineups_country",    # the table name
      key = list(name = c("Canada", "Japan")), 
      dateCol = indate,    # not quoted if using `enquo`
      startDate = as.POSIXct("2018-01-01"))
#> <SQL>
#> SELECT *
#> FROM `lineups_country`
#> <SQL>
#> SELECT *
#> FROM `lineups_country`
#> WHERE (`name` IN ('Canada', 'Japan'))
#> <SQL>
#> SELECT *
#> FROM (SELECT *
#> FROM `lineups_country`
#> WHERE (`name` IN ('Canada', 'Japan')))
#> WHERE (`indate` >= '2018-01-01T05:00:00Z')

df2
#> # A tibble: 82 x 2
#>    indate              name  
#>    <chr>               <chr> 
#>  1 2018-01-02 19:00:00 Canada
#>  2 2018-01-02 19:00:00 Japan 
#>  3 2018-01-03 19:00:00 Canada
#>  4 2018-01-03 19:00:00 Japan 
#>  5 2018-01-04 19:00:00 Canada
#>  6 2018-01-04 19:00:00 Japan 
#>  7 2018-01-05 19:00:00 Canada
#>  8 2018-01-05 19:00:00 Japan 
#>  9 2018-01-06 19:00:00 Canada
#> 10 2018-01-06 19:00:00 Japan 
#> # ... with 72 more rows

答案 1 :(得分:0)

'key'和'dateCol'的names是字符输入,使用sym中的rlang将其转换为符号进行评估

myFun = function(conn, table, 
                 dateCol   = "indate", 
                 startDate = as.POSIXct("2018-01-01"), 
                 key       = list(name = c("Australia","Japan"))) {


  on.exit({dbDisconnect(conn)})
  res = tbl(conn, table) 

  res %>%
        show_query()

  # filter the country
  countryCol = names(key)
 country <- rlang::sym(countryCol) 
  res <- res %>% 
             filter(!! (country) %in% key[[1]])

  res %>% 
         show_query()

  # filter the date
  dateCol <- rlang::sym(dateCol) 
  res <- res %>%
             filter(!! (dateCol) >= startDate)

  res %>%
       show_query()

  return(res %>% 
               collect())
 }

- 运行功能

df2 <- myFun(conn, 
       table = "lineups_country",    # the table name
       key = list(name = c("Canada", "Japan")), 
       dateCol = "indate",    
       startDate = as.POSIXct("2018-01-01"))
#<SQL>
#SELECT *
#FROM `lineups_country`
#<SQL>
#SELECT *
#FROM `lineups_country`
#WHERE (`name` IN ('Canada', 'Japan'))
#<SQL>
#SELECT *
#FROM (SELECT *
#FROM `lineups_country`
#WHERE (`name` IN ('Canada', 'Japan')))
#WHERE (`indate` >= '2017-12-31T18:30:00Z')

head(df2, 5)
# A tibble: 5 x 2
#  indate              name  
#   <chr>               <chr> 
#1 2018-01-01 05:30:00 Canada
#2 2018-01-01 05:30:00 Japan 
#3 2018-01-02 05:30:00 Canada
#4 2018-01-02 05:30:00 Japan 
#5 2018-01-03 05:30:00 Canada