我需要创建一个使用dplyr
执行数据库调用的包装函数。
首先创建一个可重现的示例:
library("DBI")
library("dplyr")
conn = DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
df = expand.grid(indate = as.character(as.POSIXct(seq(as.Date('2017/06/06'), as.Date('2018/02/12'), by="day"))), name = c("Canada","Japan","USA"), stringsAsFactors = FALSE)
copy_to(conn, df, "lineups_country",
temporary = FALSE,
indexes = list(
"indate",
"name"
)
)
这是没有包装函数的代码:
res = tbl(conn, table)
# filter the country
res = res %>% filter(name %in% c("Canada","Japan"))
# filter the date
res = res %>% filter(indate >= "2018-01-01")
res %>% show_query()
df2=res %>% collect()
unique(df$name);unique(df2$name)
min(df$indate);min(df2$indate)
现在要创建包装函数,我已阅读文档https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html
然而,事情对我来说还不是很清楚,特别是关于引号/引号。
这是我试过的:
myFun = function(conn, table,
dateCol = "indate",
startDate = as.POSIXct("2018-01-01"),
key = list(name = c("Australia","Japan"))) {
on.exit({dbDisconnect(conn)})
res = tbl(conn, table)
res %>% show_query()
# filter the country
countryCol = names(key)
enquo_country <- enquo(countryCol) #enquo_country <- rlang::sym(countryCol) #
res = res %>% filter(!!enquo_country %in% key[[1]])
res %>% show_query()
# filter the date
enquo_dateCol <- enquo(dateCol) #enquo_country <- rlang::sym(names(key)) #
res = res %>% filter(!!enquo_dateCol >= as.character(startDate))
res %>% show_query()
return(res %>% collect())
}
给出错误:
匹配错误(x,table,nomatch = 0L):&#39;匹配&#39;需要矢量 参数
答案 0 :(得分:1)
你需要改变一些事情:
table
,这是一个函数; sym
返回的字符向量使用names(key)
将其变成一个结果; enquo
,请不要引用dateCol
。如果您要引用它,请使用sym
。startDate
; startDate
转换为字符没有特别的意义;无论如何都处理得很好。library("DBI")
library("dplyr")
conn = DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
df = expand.grid(indate = as.character(as.POSIXct(seq(as.Date('2017/06/06'), as.Date('2018/02/12'), by="day"))),
name = c("Canada","Japan","USA"), stringsAsFactors = FALSE)
copy_to(conn, df, "lineups_country",
temporary = FALSE,
indexes = list("indate", "name"))
myFun = function(conn, table,
dateCol = indate,
startDate = as.POSIXct("2018-01-01"),
key = list(name = c("Australia","Japan"))) {
on.exit({dbDisconnect(conn)})
res = tbl(conn, table)
res %>% show_query()
# filter the country
enquo_country <- sym(names(key)) # use `sym` here
res = res %>% filter(!!enquo_country %in% key[[1]])
res %>% show_query()
# filter the date
enquo_dateCol <- enquo(dateCol)
res = res %>% filter(!!enquo_dateCol >= startDate)
res %>% show_query()
return(res %>% collect())
}
现在:
df2 <- myFun(conn,
table = "lineups_country", # the table name
key = list(name = c("Canada", "Japan")),
dateCol = indate, # not quoted if using `enquo`
startDate = as.POSIXct("2018-01-01"))
#> <SQL>
#> SELECT *
#> FROM `lineups_country`
#> <SQL>
#> SELECT *
#> FROM `lineups_country`
#> WHERE (`name` IN ('Canada', 'Japan'))
#> <SQL>
#> SELECT *
#> FROM (SELECT *
#> FROM `lineups_country`
#> WHERE (`name` IN ('Canada', 'Japan')))
#> WHERE (`indate` >= '2018-01-01T05:00:00Z')
df2
#> # A tibble: 82 x 2
#> indate name
#> <chr> <chr>
#> 1 2018-01-02 19:00:00 Canada
#> 2 2018-01-02 19:00:00 Japan
#> 3 2018-01-03 19:00:00 Canada
#> 4 2018-01-03 19:00:00 Japan
#> 5 2018-01-04 19:00:00 Canada
#> 6 2018-01-04 19:00:00 Japan
#> 7 2018-01-05 19:00:00 Canada
#> 8 2018-01-05 19:00:00 Japan
#> 9 2018-01-06 19:00:00 Canada
#> 10 2018-01-06 19:00:00 Japan
#> # ... with 72 more rows
答案 1 :(得分:0)
'key'和'dateCol'的names
是字符输入,使用sym
中的rlang
将其转换为符号进行评估
myFun = function(conn, table,
dateCol = "indate",
startDate = as.POSIXct("2018-01-01"),
key = list(name = c("Australia","Japan"))) {
on.exit({dbDisconnect(conn)})
res = tbl(conn, table)
res %>%
show_query()
# filter the country
countryCol = names(key)
country <- rlang::sym(countryCol)
res <- res %>%
filter(!! (country) %in% key[[1]])
res %>%
show_query()
# filter the date
dateCol <- rlang::sym(dateCol)
res <- res %>%
filter(!! (dateCol) >= startDate)
res %>%
show_query()
return(res %>%
collect())
}
- 运行功能
df2 <- myFun(conn,
table = "lineups_country", # the table name
key = list(name = c("Canada", "Japan")),
dateCol = "indate",
startDate = as.POSIXct("2018-01-01"))
#<SQL>
#SELECT *
#FROM `lineups_country`
#<SQL>
#SELECT *
#FROM `lineups_country`
#WHERE (`name` IN ('Canada', 'Japan'))
#<SQL>
#SELECT *
#FROM (SELECT *
#FROM `lineups_country`
#WHERE (`name` IN ('Canada', 'Japan')))
#WHERE (`indate` >= '2017-12-31T18:30:00Z')
head(df2, 5)
# A tibble: 5 x 2
# indate name
# <chr> <chr>
#1 2018-01-01 05:30:00 Canada
#2 2018-01-01 05:30:00 Japan
#3 2018-01-02 05:30:00 Canada
#4 2018-01-02 05:30:00 Japan
#5 2018-01-03 05:30:00 Canada