我想系统地分析给定的函数,以找出在该函数中调用的其他函数。如果可能,递归。
我在milktrader的博文中发现了这个功能,我可以为包(或命名空间)做类似的事情
listFunctions <- function(
name,
...
){
name.0 <- name
name <- paste("package", ":", name, sep="")
if (!name %in% search()) {
stop(paste("Invalid namespace: '", name.0, "'"))
}
# KEEP AS REFERENCE
# out <- ls(name)
funlist <- lsf.str(name)
out <- head(funlist, n=length(funlist))
return(out)
}
> listFunctions("stats")
[1] "acf" "acf2AR" "add.scope"
[4] "add1" "addmargins" "aggregate"
[7] "aggregate.data.frame" "aggregate.default" "aggregate.ts"
[10] "AIC" "alias" "anova"
....
[499] "xtabs"
然而,我想要一个函数,其中name
将是函数的名称,返回值是在{{1}内调用的函数的字符向量(或列表,如果以递归方式完成) }}
我实际上需要某种基于字符的输出(矢量或列表)。这样做的原因是我正在研究一个通用的包装函数,用于并行化一个“内部函数”,你不必经过一个耗时的试错过程,以便找出其他函数内在的功能取决于。因此,我所使用的函数的输出将直接用于name
和/或snowfall::sfExport()
。
编辑2012-08-08
由于两面派有一些亲密的选票,我明天会检查答案如何与其他问题合并。
答案 0 :(得分:6)
试试这个例子:
library(codetools)
ff <- function(f) {
leaf <- function (e, w) {
r <- try(eval(e), silent = TRUE)
if(!is.null(r) && is.function(r)) ret <<- c(ret, as.character(e))
}
call <- function (e, w) {
walkCode(e[[1]], w)
for (a in as.list(e[-1])) if (!missing(a)) walkCode(a, w)
}
ret <- c()
walkCode(body(f), makeCodeWalker(call = call, leaf = leaf, write = cat))
unique(ret)
}
然后,
> ff(data.frame)
[1] "{" "<-" "if" "&&" "is.null" "row.names" "function" "is.character"
[9] "new" "as.character" "anyDuplicated" "return" "||" "all" "==" "stop"
[17] "gettextf" "warning" "paste" "which" "duplicated" "[" "as.list" "substitute"
[25] "list" "-" "missing" "length" "<" "!" "is.object" "is.integer"
[33] "any" "is.na" "unique" "integer" "structure" "character" "names" "!="
[41] "nzchar" "for" "seq_len" "[[" "is.list" "as.data.frame" ".row_names_info" ">"
[49] "deparse" "substr" "nchar" "attr" "abs" "max" "(" "%%"
[57] "unclass" "seq_along" "is.vector" "is.factor" "rep" "class" "inherits" "break"
[65] "next" "unlist" "make.names" "match" ".set_row_names"
> ff(read.table)
[1] "{" "if" "&&" "missing" "file" "!" "text" "<-" "textConnection"
[10] "on.exit" "close" "is.character" "nzchar" "inherits" "stop" "isOpen" "open" ">"
[19] "readLines" "<" "min" "(" "+" "lines" ".Internal" "quote" "length"
[28] "all" "==" "pushBack" "c" "stdin" "scan" "col" "numeric" "-"
[37] "for" "seq_along" "[" "max" "!=" "warning" "paste0" ":" "make.names"
[46] "names" "is.null" "rep" "match" "any" "<=" "rep.int" "list" "%in%"
[55] "sapply" "do.call" "data" "flush" "[[" "which" "is.logical" "is.numeric" "|"
[64] "gettextf" "&" "is.na" "type.convert" "character" "as.factor" "as.Date" "as.POSIXct" "::"
[73] "methods" "as" "row.names" ".set_row_names" "as.integer" "||" "is.object" "is.integer" "as.character"
[82] "anyDuplicated" "class" "attr"
答案 1 :(得分:5)
必须有更好的方法,但这是我的尝试:
listFunctions <- function(function.name, recursive = FALSE,
checked.functions = NULL){
# Get the function's code:
function.code <- deparse(get(function.name))
# break code up into sections preceding left brackets:
left.brackets <- c(unlist(strsplit(function.code,
split="[[:space:]]*\\(")))
called.functions <- unique(c(unlist(sapply(left.brackets,
function (x) {
# Split up according to anything that can't be in a function name.
# split = not alphanumeric, not '_', and not '.'
words <- c(unlist(strsplit(x, split="[^[:alnum:]_.]")))
last.word <- tail(words, 1)
last.word.is.function <- tryCatch(is.function(get(last.word)),
error=function(e) return(FALSE))
return(last.word[last.word.is.function])
}))))
if (recursive){
# checked.functions: We need to keep track of which functions
# we've checked to avoid infinite loops.
functs.to.check <- called.functions[!(called.functions %in%
checked.functions)]
called.functions <- unique(c(called.functions,
do.call(c, lapply(functs.to.check, function(x) {
listFunctions(x, recursive = T,
checked.functions = c(checked.functions,
called.functions))
}))))
}
return(called.functions)
}
结果:
> listFunctions("listFunctions", recursive = FALSE)
[1] "function" "deparse" "get" "c"
[5] "unlist" "strsplit" "unique" "sapply"
[9] "tail" "tryCatch" "is.function" "return"
[13] "if" "do.call" "lapply" "listFunctions"
> system.time(all.functions <- listFunctions("listFunctions", recursive = TRUE))
user system elapsed
92.31 0.08 93.49
> length(all.functions)
[1] 518
如您所见,递归版本返回了许多函数。这个问题是它返回在进程中调用的每个函数,这显然会随着你的进行而增加。无论如何,我希望你能用它(或修改它)来满足你的需要。
答案 2 :(得分:1)
此答案基于Edward和Kohske的答案。我将不考虑最终接受的答案,其主要目的只是为其他用户记录另一种/扩展方法和一些基准。
由Edward提供。
listFunctions_inner <- function(
name,
do.recursive=FALSE,
.do.verbose=FALSE,
.buffer=new.env()
){
..name <- "listFunctions_inner"
if (!is.character(name) | missing(name)) {
stop(paste(..name, " // expecting 'name' of class 'character'", sep=""))
}
name.0 <- name
if (tryCatch(is.function(get(name)), error=function(e) FALSE)) {
# PROCESS FUNCTIONS
if (.do.verbose) {
message(paste(..name, " // processing function: '", name, "'", sep=""))
}
# Get the function's code:
code <- deparse(get(name))
# break code up into sections preceding left brackets:
left.brackets <- c(unlist(strsplit(code, split="[[:space:]]*\\(")))
out <- sort(unique(unlist(lapply(left.brackets, function (x) {
# Split up according to anything that can't be in a function name.
# split = not alphanumeric, not '_', and not '.'
words <- c(unlist(strsplit(x, split="[^[:alnum:]_.]")))
last.word <- tail(words, 1)
last.word.is.function <- tryCatch(is.function(get(last.word)),
error=function(e) return(FALSE))
out <- last.word[last.word.is.function]
return(out)
}))))
if (do.recursive){
# funs.checked: We need to keep track of which functions
# we've checked to avoid infinite loops.
.buffer$funs.checked <- c(.buffer$funs.checked, name)
funs.next <- out[!(out %in% .buffer$funs.checked)]
if (length(funs.next)) {
out <- sort(unique(unlist(c(out, do.call(c,
lapply(funs.next, function(x) {
if (x == ".Primitive") {
return(NULL)
}
listFunctions_inner(
name=x,
do.recursive=TRUE,
.buffer=.buffer
)
})
)))))
}
}
out <- sort(unique(unlist(out)))
} else {
# PROCESS NAMESPACES
if (.do.verbose) {
message(paste(..name, " // processing namespace: '", name, "'", sep=""))
}
name <- paste("package", ":", name, sep="")
if (!name %in% search()) {
stop(paste(..name, " // invalid namespace: '", name.0, "'"))
}
# KEEP AS REFERENCE
# out <- ls(name)
funlist <- lsf.str(name)
out <- head(funlist, n=length(funlist))
}
out
}
由Kohske提供
listFunctions2_inner <- function(
name,
do.recursive=FALSE,
.do.verbose=FALSE,
.buffer=new.env()
) {
..name <- "listFunctions2_inner"
if (!is.character(name) | missing(name)) {
stop(paste(..name, " // expecting 'name' of class 'character'", sep=""))
}
name.0 <- name
if (tryCatch(is.function(get(name)), error=function(e) FALSE)) {
# PROCESS FUNCTIONS
leaf <- function (e, w) {
r <- try(eval(e), silent = TRUE)
if(!is.null(r) && is.function(r)) out <<- c(out, as.character(e))
}
call <- function (e, w) {
walkCode(e[[1]], w)
for (a in as.list(e[-1])) if (!missing(a)) walkCode(a, w)
}
out <- c()
walkCode(
body(name),
makeCodeWalker(call=call, leaf=leaf, write=cat)
)
if (do.recursive){
# funs.checked: We need to keep track of which functions
# we've checked to avoid infinite loops.
.buffer$funs.checked <- c(.buffer$funs.checked, name)
funs.next <- out[!(out %in% .buffer$funs.checked)]
if (length(funs.next)) {
out <- sort(unique(unlist(c(out, do.call(c,
lapply(funs.next, function(x) {
if (x == ".Primitive") {
return(NULL)
}
listFunctions_inner(
name=x,
do.recursive=TRUE,
.buffer=.buffer
)
})
)))))
}
}
out <- sort(unique(out))
} else {
# PROCESS NAMESPACES
if (.do.verbose) {
message(paste(..name, " // processing namespace: '", name, "'", sep=""))
}
name <- paste("package", ":", name, sep="")
if (!name %in% search()) {
stop(paste(..name, " // invalid namespace: '", name.0, "'"))
}
# KEEP AS REFERENCE
# out <- ls(name)
funlist <- lsf.str(name)
out <- head(funlist, n=length(funlist))
}
}
这个包装器让你选择使用的实际内部函数,并允许指定应该或不应该考虑的命名空间。这对我的用例很重要(请参阅上面的动机部分),因为我通常只对尚未移动到包中的“自己的”函数(在.GlobalEnv
中)感兴趣。
listFunctions <- function(
name,
ns,
innerFunction=listFunctions,
do.inverse=FALSE,
do.table=FALSE,
do.recursive=FALSE,
.do.verbose=FALSE
){
..name <- "listFunctions_inner"
if (!is.character(name) | missing(name)) {
stop(paste(..name, " // expecting 'name' of class 'character'", sep=""))
}
out <- innerFunction(name, do.recursive=do.recursive,
.do.verbose=.do.verbose)
if (do.table) {
x.ns <- sapply(out, function(x) {
out <- environmentName(environment(get(x)))
if (out == "") {
out <- ".Primitive"
}
out
})
if (!missing(ns)) {
if (!do.inverse) {
idx <- which(x.ns %in% ns)
} else {
idx <- which(!x.ns %in% ns)
}
if (!length(idx)) {
return(NULL)
}
out <- out[idx]
x.ns <- x.ns[idx]
}
out <- data.frame(name=out, ns=x.ns, stringsAsFactors=FALSE)
rownames(out) <- NULL
}
out
}
# Character vector
listFunctions("install.packages")
# Data Frame (table)
> listFunctions("install.packages", do.table=TRUE)
name ns
1 .libPaths .Primitive
2 .standard_regexps base
3 any .Primitive
4 available.packages utils
...
84 winDialog utils
# Consider 'base' only
> listFunctions("install.packages", ns="base", do.table=TRUE)
name ns
1 .standard_regexps base
2 basename base
3 capabilities base
...
56 warning base
# Consider all except 'base'
> listFunctions("install.packages", ns="base", do.inverse=TRUE, do.table=TRUE)
name ns
1 .libPaths .Primitive
2 any .Primitive
3 available.packages utils
...
28 winDialog utils
# Recursively, no table
listFunctions("install.packages", do.recursive=TRUE)
# Recursively table
listFunctions("install.packages", do.table=TRUE, do.recursive=TRUE)
name ns
1 .amatch_bounds base
2 .amatch_costs base
3 .C .Primitive
...
544 xzfile base
# List functions inside a namespace
listFunctions("utils")
listFunctions("utils", do.table=TRUE)
> bench <- microbenchmark(listFunctions("install.packages"))
bench
> Unit: milliseconds
expr min lq median uq
1 listFunctions("install.packages") 152.9654 157.2805 160.5019 165.4688
max
1 244.6589
> bench <- microbenchmark(listFunctions("install.packages", do.recursive=TRUE), times=3)
bench
> Unit: seconds
expr min lq
1 listFunctions("install.packages", do.recursive = TRUE) 6.272732 6.30164
median uq max
1 6.330547 6.438158 6.545769
> bench <- microbenchmark(listFunctions("install.packages",
+ innerFunction=listFunctions2_inner))
bench
> Unit: milliseconds
expr
1 listFunctions("install.packages", innerFunction = listFunctions2_inner)
min lq median uq max
1 207.0299 212.3286 222.6448 324.6399 445.4154
> bench <- microbenchmark(listFunctions("install.packages",
+ innerFunction=listFunctions2_inner, do.recursive=TRUE), times=3)
bench
Warning message:
In nm[nm == ""] <- exprnm[nm == ""] :
number of items to replace is not a multiple of replacement length
> Unit: seconds
expr
1 listFunctions("install.packages", innerFunction = listFunctions2_inner,
min lq median uq max
1 7.673281 8.065561 8.457841 8.558259 8.658678