Question

我对此问题的任何帮助都将不胜感激。我是一名中等程度的高级程序员，但到目前为止我所有的解决方案都让我失望了。我从我想要做的事情背后的逻辑开始，接着是我的尝试，然后是测试用例。我试图尽可能明确。

我应该提一下，我知道问题是什么，但我不知道解决方案是什么。

# sqldf has some limitations:

cpaste <- function(x) paste(x, collapse = ", ")

dd <- data.frame(a = 1:10)
b <- 5:8

# this is what I want to get
sqldf("select * from dd where a in (5, 6, 7, 8)")

# but I want to get it by typing this
sqldf(sprintf("select * from %s where a in (%s)", dd, b)) # error

# and it doesn't work, because this is what sprintf expects:
sqldf(sprintf("select * from %s where a in (%s)", "dd", paste(b, collapse = ", ")))

# in other words, 
# (1) the name of data frame, not the data frame itself, and
# (2) the vector must be turned into a single string with comma separated values

# I wrote a wrapper function for sqldf
# it uses sprintf to create the sql string that I need to feed to sqldf
# but before doing that it does (1) and (2) as mentioned above
# so I can do this and it would work:
run_sql("select * from %s where a in (%s)", dd, b)

# it works until I try running it inside another funciton
# where I start running into some problems

# here's the function, followed by test cases

run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE) {

  require(sqldf)

  ellipsis <- as.list(match.call(expand.dots = TRUE))
  ellipsis[1] <- NULL
  ellipsis$inline <- NULL
  ellipsis$display <- NULL
  ellipsis$eval <- NULL
  # print(ellipsis)
  # print(lapply(ellipsis, class))

  ffn <- function(x) {
    if (is.name(x)) { # the argument passed is itself a variable
      if (is.data.frame(eval(x))) {
        as.character(x) # returns just the name of the data frame
      } else if (is.atomic(eval(x))) {
        cpaste(eval(x)) # return the atomic vector as comma-sep string
      } else "_____FAIL1_____"
    } else if (is.call(x)) { # the argument passed is a function call, eg 2:4
      if (is.atomic(eval(x))) cpaste(eval(x)) else "_____FAIL2_____"
    } else {
      if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
    }
  }
  ellipsis <- lapply(ellipsis, ffn)

  zcode <- do.call(sprintf, unname(ellipsis))
  if (display == TRUE) cat(paste0(zcode, "\n\n"))

  if (eval == TRUE) {
    sqldf(zcode)
  } else {
    zcode
  }

}

dd <- data.frame(a = 1:10)
b <- 5:8
run_sql("select * from %s where a > %s", dd, 5)
run_sql("select * from %s where a in (%s)", dd, b)

# it works when the function uses variables in .GlobalEnv
# but this is not the preferred way:
foo <- function() {
  run_sql("select * from %s where a in (%s)", dd, b)
}
foo()

# here's the preferred way
# but things stop working:
foo <- function(x, y) {
  run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b) 

# here's one solution to the above, but I am hoping there's a better way
foo <- function(x, y) {
  do.call(run_sql, list("select * from %s where a in (%s)", 
    substitute(x), 
    substitute(y)))
}
foo(dd, b) 

# also, the above solution does not work with local variables
foo <- function() {
  bb <- dd
  do.call(run_sql, list("select * from %s where a in (%s)", 
    bb, 
    substitute(y)))
}
foo()

Answer 1

您正在寻找的功能已存在于gsubfn包中，该包由sqldf自动提取。有关执行此操作的方法，请参阅Example 5上的sqldf home page，并查看vignette中的gsubfn package，了解有关fn的更多信息。就目前的例子而言：

dd <- data.frame(a = 1:10)
ddname <- "dd"

b <- 5
fn$sqldf("select * from $ddname where a > $b")

b <- 5:8
fn$sqldf("select * from $ddname where a in (`toString(b)`)")

Answer 2

@G。对于这篇文章的许多读者来说，格洛腾迪克的解决方案可能会更容易。也就是说，我认为您可以通过识别调用run_sql的父环境来修复您的函数，然后使用envir=在您调用依赖于环境的函数时指定该环境 - 具体而言，{ {1}}和eval()。像这样：

sqldf()

这适用于使用x和y的测试用例：

cpaste <- function(x) paste(x, collapse = ", ")
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) {

  require(sqldf)

  ellipsis <- as.list(match.call(expand.dots = TRUE))
  ellipsis[1] <- NULL
  ellipsis$inline <- NULL
  ellipsis$display <- NULL
  ellipsis$eval <- NULL
  # print(ellipsis)
  # print(lapply(ellipsis, class))

  ffn <- function(x) {
    if (is.name(x)) { # the argument passed is itself a variable
      if (is.data.frame(eval(x, envir=envir))) {
        as.character(x) # returns just the name of the data frame
      } else if (is.atomic(eval(x, envir=envir))) {
        cpaste(eval(x, envir=envir)) # return the atomic vector as comma-sep string
      } else "_____FAIL1_____"
    } else if (is.call(x)) { # the argument passed is a function call, eg 2:4
      if (is.atomic(eval(x, envir=envir))) cpaste(eval(x, envir=envir)) else "_____FAIL2_____"
    } else {
      if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
    }
  }
  ellipsis <- lapply(ellipsis, ffn)

  zcode <- do.call(sprintf, unname(ellipsis))
  if (display == TRUE) cat(paste0(zcode, "\n\n"))

  if (eval == TRUE) {
    sqldf(zcode, envir=envir)
  } else {
    zcode
  }

}

并且，通过一些调整，在使用foo <- function(x, y) { run_sql("select * from %s where a in (%s)", x, y) } foo(dd, b)和局部变量的测试用例中：

do.call

要了解原始函数的问题，并了解每次调用foo <- function(y) { bb <- dd do.call(run_sql, list("select * from %s where a in (%s)", as.name("bb"), substitute(y), envir=environment())) } foo(b)时可见的环境，我编写了一个名为eval()的函数来包装多个enveval函数和sys.xxx电话。然后，回到eval()函数，我通过调用run_sql替换了对eval的所有来电。

enveval

使用测试用例可以显示每次调用时# enveval: Replace an eval() call with enveval() to see a description of the stack of environments experienced by eval() enveval <- function(x, envir=parent.frame()) { cat(paste0("EVALUATING ",as.character(x),":\n")) stack <- data.frame(frame_num=1:sys.nframe(), call=strtrim(as.character(sys.calls()),15), is_eval_envir=NA, vars_in_frame=NA, x_exists=NA, eval_x=NA) for(i in 1:nrow(stack)) { f <- which(i==stack$frame_num) stack[f,"is_eval_envir"] <- identical(envir,sys.frame(f)) stack[f,"vars_in_frame"] <- paste(ls(envir=sys.frame(f)),collapse=",") stack[f,"x_exists"] <- exists(as.character(x), where=sys.frame(f)) if(stack[f,"is_eval_envir"] & stack[f,"x_exists"]) { # if all the variables to evaluate are single-element atomic, you can also run the following line: if(is.atomic(eval(x, envir=sys.frame(f)))) { stack[f,"eval_x"] <- cpaste(eval(x, envir=sys.frame(f))) } else { stack[f,"eval_x"] <- "[non-atomic]" } } } print(stack) eval(x, envir=envir) } # The new run_sql where eval is replaced with enveval: run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) { require(sqldf) ellipsis <- as.list(match.call(expand.dots = TRUE)) ellipsis[1] <- NULL ellipsis$inline <- NULL ellipsis$display <- NULL ellipsis$eval <- NULL # print(ellipsis) # print(lapply(ellipsis, class)) ffn <- function(x) { if (is.name(x)) { # the argument passed is itself a variable if (is.data.frame(enveval(x, envir=envir))) { as.character(x) # returns just the name of the data frame } else if (is.atomic(enveval(x, envir=envir))) { cpaste(enveval(x, envir=envir)) # return the atomic vector as comma-sep string } else "_____FAIL1_____" } else if (is.call(x)) { # the argument passed is a function call, eg 2:4 if (is.atomic(enveval(x, envir=envir))) cpaste(enveval(x, envir=envir)) else "_____FAIL2_____" } else { if (is.atomic(x)) cpaste(x) else "_____FAIL3_____" } } ellipsis <- lapply(ellipsis, ffn) zcode <- do.call(sprintf, unname(ellipsis)) if (display == TRUE) cat(paste0(zcode, "\n\n")) if (eval == TRUE) { sqldf(zcode, envir=envir) } else { zcode } }看到的内容（以及enveval会看到的内容）。例如，运行第一个测试函数：

eval

给出了以下打印输出，显示foo <- function(x, y) { run_sql("select * from %s where a in (%s)", x, y) } foo(dd, b)调用的框架是每次调用foo(dd, b)的有用环境：

eval()

R：范围规则让我陷入了浑水

2 个答案: