R:范围规则让我陷入了浑水

时间:2014-02-01 04:53:23

标签: r evaluation scoping

我对此问题的任何帮助都将不胜感激。我是一名中等程度的高级程序员,但到目前为止我所有的解决方案都让我失望了。我从我想要做的事情背后的逻辑开始,接着是我的尝试,然后是测试用例。我试图尽可能明确。

我应该提一下,我知道问题是什么,但我不知道解决方案是什么。

# sqldf has some limitations:

cpaste <- function(x) paste(x, collapse = ", ")

dd <- data.frame(a = 1:10)
b <- 5:8

# this is what I want to get
sqldf("select * from dd where a in (5, 6, 7, 8)")

# but I want to get it by typing this
sqldf(sprintf("select * from %s where a in (%s)", dd, b)) # error

# and it doesn't work, because this is what sprintf expects:
sqldf(sprintf("select * from %s where a in (%s)", "dd", paste(b, collapse = ", ")))

# in other words, 
# (1) the name of data frame, not the data frame itself, and
# (2) the vector must be turned into a single string with comma separated values

# I wrote a wrapper function for sqldf
# it uses sprintf to create the sql string that I need to feed to sqldf
# but before doing that it does (1) and (2) as mentioned above
# so I can do this and it would work:
run_sql("select * from %s where a in (%s)", dd, b)

# it works until I try running it inside another funciton
# where I start running into some problems

# here's the function, followed by test cases

run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE) {

  require(sqldf)

  ellipsis <- as.list(match.call(expand.dots = TRUE))
  ellipsis[1] <- NULL
  ellipsis$inline <- NULL
  ellipsis$display <- NULL
  ellipsis$eval <- NULL
  # print(ellipsis)
  # print(lapply(ellipsis, class))

  ffn <- function(x) {
    if (is.name(x)) { # the argument passed is itself a variable
      if (is.data.frame(eval(x))) {
        as.character(x) # returns just the name of the data frame
      } else if (is.atomic(eval(x))) {
        cpaste(eval(x)) # return the atomic vector as comma-sep string
      } else "_____FAIL1_____"
    } else if (is.call(x)) { # the argument passed is a function call, eg 2:4
      if (is.atomic(eval(x))) cpaste(eval(x)) else "_____FAIL2_____"
    } else {
      if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
    }
  }
  ellipsis <- lapply(ellipsis, ffn)

  zcode <- do.call(sprintf, unname(ellipsis))
  if (display == TRUE) cat(paste0(zcode, "\n\n"))

  if (eval == TRUE) {
    sqldf(zcode)
  } else {
    zcode
  }

}

dd <- data.frame(a = 1:10)
b <- 5:8
run_sql("select * from %s where a > %s", dd, 5)
run_sql("select * from %s where a in (%s)", dd, b)

# it works when the function uses variables in .GlobalEnv
# but this is not the preferred way:
foo <- function() {
  run_sql("select * from %s where a in (%s)", dd, b)
}
foo()

# here's the preferred way
# but things stop working:
foo <- function(x, y) {
  run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b) 

# here's one solution to the above, but I am hoping there's a better way
foo <- function(x, y) {
  do.call(run_sql, list("select * from %s where a in (%s)", 
    substitute(x), 
    substitute(y)))
}
foo(dd, b) 

# also, the above solution does not work with local variables
foo <- function() {
  bb <- dd
  do.call(run_sql, list("select * from %s where a in (%s)", 
    bb, 
    substitute(y)))
}
foo()

2 个答案:

答案 0 :(得分:2)

您正在寻找的功能已存在于gsubfn包中,该包由sqldf自动提取。有关执行此操作的方法,请参阅Example 5上的sqldf home page,并查看vignette中的gsubfn package,了解有关fn的更多信息。就目前的例子而言:

dd <- data.frame(a = 1:10)
ddname <- "dd"

b <- 5
fn$sqldf("select * from $ddname where a > $b")

b <- 5:8
fn$sqldf("select * from $ddname where a in (`toString(b)`)")

答案 1 :(得分:2)

@G。对于这篇文章的许多读者来说,格洛腾迪克的解决方案可能会更容易。也就是说,我认为您可以通过识别调用run_sql的父环境来修复您的函数,然后使用envir=在您调用依赖于环境的函数时指定该环境 - 具体而言,{ {1}}和eval()。像这样:

sqldf()

这适用于使用x和y的测试用例:

cpaste <- function(x) paste(x, collapse = ", ")
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) {

  require(sqldf)

  ellipsis <- as.list(match.call(expand.dots = TRUE))
  ellipsis[1] <- NULL
  ellipsis$inline <- NULL
  ellipsis$display <- NULL
  ellipsis$eval <- NULL
  # print(ellipsis)
  # print(lapply(ellipsis, class))

  ffn <- function(x) {
    if (is.name(x)) { # the argument passed is itself a variable
      if (is.data.frame(eval(x, envir=envir))) {
        as.character(x) # returns just the name of the data frame
      } else if (is.atomic(eval(x, envir=envir))) {
        cpaste(eval(x, envir=envir)) # return the atomic vector as comma-sep string
      } else "_____FAIL1_____"
    } else if (is.call(x)) { # the argument passed is a function call, eg 2:4
      if (is.atomic(eval(x, envir=envir))) cpaste(eval(x, envir=envir)) else "_____FAIL2_____"
    } else {
      if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
    }
  }
  ellipsis <- lapply(ellipsis, ffn)

  zcode <- do.call(sprintf, unname(ellipsis))
  if (display == TRUE) cat(paste0(zcode, "\n\n"))

  if (eval == TRUE) {
    sqldf(zcode, envir=envir)
  } else {
    zcode
  }

}

并且,通过一些调整,在使用foo <- function(x, y) { run_sql("select * from %s where a in (%s)", x, y) } foo(dd, b) 和局部变量的测试用例中:

do.call

要了解原始函数的问题,并了解每次调用foo <- function(y) { bb <- dd do.call(run_sql, list("select * from %s where a in (%s)", as.name("bb"), substitute(y), envir=environment())) } foo(b) 时可见的环境,我编写了一个名为eval()的函数来包装多个enveval函数和sys.xxx电话。然后,回到eval()函数,我通过调用run_sql替换了对eval的所有来电。

enveval

使用测试用例可以显示每次调用时# enveval: Replace an eval() call with enveval() to see a description of the stack of environments experienced by eval() enveval <- function(x, envir=parent.frame()) { cat(paste0("EVALUATING ",as.character(x),":\n")) stack <- data.frame(frame_num=1:sys.nframe(), call=strtrim(as.character(sys.calls()),15), is_eval_envir=NA, vars_in_frame=NA, x_exists=NA, eval_x=NA) for(i in 1:nrow(stack)) { f <- which(i==stack$frame_num) stack[f,"is_eval_envir"] <- identical(envir,sys.frame(f)) stack[f,"vars_in_frame"] <- paste(ls(envir=sys.frame(f)),collapse=",") stack[f,"x_exists"] <- exists(as.character(x), where=sys.frame(f)) if(stack[f,"is_eval_envir"] & stack[f,"x_exists"]) { # if all the variables to evaluate are single-element atomic, you can also run the following line: if(is.atomic(eval(x, envir=sys.frame(f)))) { stack[f,"eval_x"] <- cpaste(eval(x, envir=sys.frame(f))) } else { stack[f,"eval_x"] <- "[non-atomic]" } } } print(stack) eval(x, envir=envir) } # The new run_sql where eval is replaced with enveval: run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) { require(sqldf) ellipsis <- as.list(match.call(expand.dots = TRUE)) ellipsis[1] <- NULL ellipsis$inline <- NULL ellipsis$display <- NULL ellipsis$eval <- NULL # print(ellipsis) # print(lapply(ellipsis, class)) ffn <- function(x) { if (is.name(x)) { # the argument passed is itself a variable if (is.data.frame(enveval(x, envir=envir))) { as.character(x) # returns just the name of the data frame } else if (is.atomic(enveval(x, envir=envir))) { cpaste(enveval(x, envir=envir)) # return the atomic vector as comma-sep string } else "_____FAIL1_____" } else if (is.call(x)) { # the argument passed is a function call, eg 2:4 if (is.atomic(enveval(x, envir=envir))) cpaste(enveval(x, envir=envir)) else "_____FAIL2_____" } else { if (is.atomic(x)) cpaste(x) else "_____FAIL3_____" } } ellipsis <- lapply(ellipsis, ffn) zcode <- do.call(sprintf, unname(ellipsis)) if (display == TRUE) cat(paste0(zcode, "\n\n")) if (eval == TRUE) { sqldf(zcode, envir=envir) } else { zcode } } 看到的内容(以及enveval会看到的内容)。例如,运行第一个测试函数:

eval

给出了以下打印输出,显示foo <- function(x, y) { run_sql("select * from %s where a in (%s)", x, y) } foo(dd, b) 调用的框架是每次调用foo(dd, b)的有用环境:

eval()