我有很多数据框,如下所示:
> dput(df)
structure(list(x1 = c(0.5, 0.65, 0.67, 0.6, 0.52, 0.47, 0.42,
0.41, 0.4, 0.38), x2 = c(88.2759008383549, 88.4629768937181,
88.6351264139755, 88.7928788760014, 88.9363150101317, 89.0654802835256,
89.1805310339952, 89.2816679977014, 89.3690643397432, 89.4429508602581
), x3 = c(0.44, 0.8, 8.6, 2.72, 0.06, 0, 0, 0, 0, 0.04)), .Names = c("x1",
"x2", "x3"), row.names = c(NA, -10L), class = "data.frame")
我想对每个data.frame应用可变数量的函数。
以下是3个函数的示例:
library(tiger)
f1 <- function(x1,x2) lagtime(x1,x2)
f2 <- function(x1,x2) mean(x1 - x2, na.rm = TRUE)
f3 <- function(x2,x3) sum(x2) /sum(x3)
myVector <- c( f1(df$x1,df$x2),
f2(df$x1,df$x2),
f3(df$x2,df$x3) )
> myVector
[1] -6.00000 -88.44229 70.25615
但是,如果我有一个n个函数的列表:
funs <- list(f1, f2, f3, ..., fn)
我应该使用lapply(或其他功能),但是如何?
答案 0 :(得分:1)
重写函数,使它们采用dataframe参数并使用列参数的名称:
> f1 <- function(df, nm1 ='x1',nm2='x2') lagtime(df[[nm1]],df[[nm2]])
> f2 <- function(df, nm1 ='x1',nm2='x2') mean(df[[nm1]] - df[[nm2]], na.rm = TRUE)
> f3 <- function(df, nm2 ='x2',nm3='x3') sum(df[[nm2]]) /sum(df[[nm3]])
然后遍历函数和数据帧:
funs <- list(f1, f2, f3)
dfs <- list(df1,df1)
lapply(funs, function(f) sapply(dfs, function(d) f(d) ) )
#------------------
[[1]]
[1] -6 -6
[[2]]
[1] -88.44229 -88.44229
[[3]]
[1] 70.25615 70.25615
答案 1 :(得分:0)
为什么不将它们全部放入一个功能中?在这里,我使用median
代替lagtime
。
返回一个清单:
> foo <- function(x, y, z, ...) {
list(median = median(c(x, y)),
mean = mean(x - y, ...),
quotient = sum(y)/sum(z))
}
> foo(df$x1, df$x2, df$x3, na.rm = TRUE)
# $median
# [1] 44.47295
#
# $mean
# [1] -88.44229
#
# $quotient
# [1] 70.25615
或作为矢量:
foo2 <- function(x, y, z, ...) {
c(median = median(c(x, y)),
mean = mean(x - y, ...),
quotient = sum(y)/sum(z))
}
foo2(df$x1, df$x2, df$x3, na.rm = TRUE)
# median mean quotient
# 44.47295 -88.44229 70.25615