我对此问题的任何帮助都将不胜感激。我是一名中等程度的高级程序员,但到目前为止我所有的解决方案都让我失望了。我从我想要做的事情背后的逻辑开始,接着是我的尝试,然后是测试用例。我试图尽可能明确。
我应该提一下,我知道问题是什么,但我不知道解决方案是什么。
# sqldf has some limitations:
cpaste <- function(x) paste(x, collapse = ", ")
dd <- data.frame(a = 1:10)
b <- 5:8
# this is what I want to get
sqldf("select * from dd where a in (5, 6, 7, 8)")
# but I want to get it by typing this
sqldf(sprintf("select * from %s where a in (%s)", dd, b)) # error
# and it doesn't work, because this is what sprintf expects:
sqldf(sprintf("select * from %s where a in (%s)", "dd", paste(b, collapse = ", ")))
# in other words,
# (1) the name of data frame, not the data frame itself, and
# (2) the vector must be turned into a single string with comma separated values
# I wrote a wrapper function for sqldf
# it uses sprintf to create the sql string that I need to feed to sqldf
# but before doing that it does (1) and (2) as mentioned above
# so I can do this and it would work:
run_sql("select * from %s where a in (%s)", dd, b)
# it works until I try running it inside another funciton
# where I start running into some problems
# here's the function, followed by test cases
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE) {
require(sqldf)
ellipsis <- as.list(match.call(expand.dots = TRUE))
ellipsis[1] <- NULL
ellipsis$inline <- NULL
ellipsis$display <- NULL
ellipsis$eval <- NULL
# print(ellipsis)
# print(lapply(ellipsis, class))
ffn <- function(x) {
if (is.name(x)) { # the argument passed is itself a variable
if (is.data.frame(eval(x))) {
as.character(x) # returns just the name of the data frame
} else if (is.atomic(eval(x))) {
cpaste(eval(x)) # return the atomic vector as comma-sep string
} else "_____FAIL1_____"
} else if (is.call(x)) { # the argument passed is a function call, eg 2:4
if (is.atomic(eval(x))) cpaste(eval(x)) else "_____FAIL2_____"
} else {
if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
}
}
ellipsis <- lapply(ellipsis, ffn)
zcode <- do.call(sprintf, unname(ellipsis))
if (display == TRUE) cat(paste0(zcode, "\n\n"))
if (eval == TRUE) {
sqldf(zcode)
} else {
zcode
}
}
dd <- data.frame(a = 1:10)
b <- 5:8
run_sql("select * from %s where a > %s", dd, 5)
run_sql("select * from %s where a in (%s)", dd, b)
# it works when the function uses variables in .GlobalEnv
# but this is not the preferred way:
foo <- function() {
run_sql("select * from %s where a in (%s)", dd, b)
}
foo()
# here's the preferred way
# but things stop working:
foo <- function(x, y) {
run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b)
# here's one solution to the above, but I am hoping there's a better way
foo <- function(x, y) {
do.call(run_sql, list("select * from %s where a in (%s)",
substitute(x),
substitute(y)))
}
foo(dd, b)
# also, the above solution does not work with local variables
foo <- function() {
bb <- dd
do.call(run_sql, list("select * from %s where a in (%s)",
bb,
substitute(y)))
}
foo()
答案 0 :(得分:2)
您正在寻找的功能已存在于gsubfn包中,该包由sqldf自动提取。有关执行此操作的方法,请参阅Example 5上的sqldf home page,并查看vignette中的gsubfn package,了解有关fn
的更多信息。就目前的例子而言:
dd <- data.frame(a = 1:10)
ddname <- "dd"
b <- 5
fn$sqldf("select * from $ddname where a > $b")
b <- 5:8
fn$sqldf("select * from $ddname where a in (`toString(b)`)")
答案 1 :(得分:2)
@G。对于这篇文章的许多读者来说,格洛腾迪克的解决方案可能会更容易。也就是说,我认为您可以通过识别调用run_sql
的父环境来修复您的函数,然后使用envir=
在您调用依赖于环境的函数时指定该环境 - 具体而言,{ {1}}和eval()
。像这样:
sqldf()
这适用于使用x和y的测试用例:
cpaste <- function(x) paste(x, collapse = ", ")
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) {
require(sqldf)
ellipsis <- as.list(match.call(expand.dots = TRUE))
ellipsis[1] <- NULL
ellipsis$inline <- NULL
ellipsis$display <- NULL
ellipsis$eval <- NULL
# print(ellipsis)
# print(lapply(ellipsis, class))
ffn <- function(x) {
if (is.name(x)) { # the argument passed is itself a variable
if (is.data.frame(eval(x, envir=envir))) {
as.character(x) # returns just the name of the data frame
} else if (is.atomic(eval(x, envir=envir))) {
cpaste(eval(x, envir=envir)) # return the atomic vector as comma-sep string
} else "_____FAIL1_____"
} else if (is.call(x)) { # the argument passed is a function call, eg 2:4
if (is.atomic(eval(x, envir=envir))) cpaste(eval(x, envir=envir)) else "_____FAIL2_____"
} else {
if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
}
}
ellipsis <- lapply(ellipsis, ffn)
zcode <- do.call(sprintf, unname(ellipsis))
if (display == TRUE) cat(paste0(zcode, "\n\n"))
if (eval == TRUE) {
sqldf(zcode, envir=envir)
} else {
zcode
}
}
并且,通过一些调整,在使用foo <- function(x, y) {
run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b)
和局部变量的测试用例中:
do.call
要了解原始函数的问题,并了解每次调用foo <- function(y) {
bb <- dd
do.call(run_sql, list("select * from %s where a in (%s)",
as.name("bb"),
substitute(y),
envir=environment()))
}
foo(b)
时可见的环境,我编写了一个名为eval()
的函数来包装多个enveval
函数和sys.xxx
电话。然后,回到eval()
函数,我通过调用run_sql
替换了对eval
的所有来电。
enveval
使用测试用例可以显示每次调用时# enveval: Replace an eval() call with enveval() to see a description of the stack of environments experienced by eval()
enveval <- function(x, envir=parent.frame()) {
cat(paste0("EVALUATING ",as.character(x),":\n"))
stack <- data.frame(frame_num=1:sys.nframe(), call=strtrim(as.character(sys.calls()),15),
is_eval_envir=NA, vars_in_frame=NA, x_exists=NA, eval_x=NA)
for(i in 1:nrow(stack)) {
f <- which(i==stack$frame_num)
stack[f,"is_eval_envir"] <- identical(envir,sys.frame(f))
stack[f,"vars_in_frame"] <- paste(ls(envir=sys.frame(f)),collapse=",")
stack[f,"x_exists"] <- exists(as.character(x), where=sys.frame(f))
if(stack[f,"is_eval_envir"] & stack[f,"x_exists"]) {
# if all the variables to evaluate are single-element atomic, you can also run the following line:
if(is.atomic(eval(x, envir=sys.frame(f)))) {
stack[f,"eval_x"] <- cpaste(eval(x, envir=sys.frame(f)))
} else {
stack[f,"eval_x"] <- "[non-atomic]"
}
}
}
print(stack)
eval(x, envir=envir)
}
# The new run_sql where eval is replaced with enveval:
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) {
require(sqldf)
ellipsis <- as.list(match.call(expand.dots = TRUE))
ellipsis[1] <- NULL
ellipsis$inline <- NULL
ellipsis$display <- NULL
ellipsis$eval <- NULL
# print(ellipsis)
# print(lapply(ellipsis, class))
ffn <- function(x) {
if (is.name(x)) { # the argument passed is itself a variable
if (is.data.frame(enveval(x, envir=envir))) {
as.character(x) # returns just the name of the data frame
} else if (is.atomic(enveval(x, envir=envir))) {
cpaste(enveval(x, envir=envir)) # return the atomic vector as comma-sep string
} else "_____FAIL1_____"
} else if (is.call(x)) { # the argument passed is a function call, eg 2:4
if (is.atomic(enveval(x, envir=envir))) cpaste(enveval(x, envir=envir)) else "_____FAIL2_____"
} else {
if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
}
}
ellipsis <- lapply(ellipsis, ffn)
zcode <- do.call(sprintf, unname(ellipsis))
if (display == TRUE) cat(paste0(zcode, "\n\n"))
if (eval == TRUE) {
sqldf(zcode, envir=envir)
} else {
zcode
}
}
看到的内容(以及enveval
会看到的内容)。例如,运行第一个测试函数:
eval
给出了以下打印输出,显示foo <- function(x, y) {
run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b)
调用的框架是每次调用foo(dd, b)
的有用环境:
eval()