我试图用一组简洁的代码替换多个单独的mapply语句。我终于使用了3个嵌套的mapply语句,但这似乎有点令人费解。我是其他语言的R新手,所以在R心态中寻找一些帮助。如果3个陈述是最好的方法,我可以忍受它,但寻找输入。如果您有更好的方法来构建像这样的子集输出,我很满意。
payments <- data.frame(
Amount = sample(5:15,100,replace=TRUE),
Tip.Amount = round(runif(100,0,2),2),
"A" = sample(c(TRUE,FALSE),100,replace=TRUE),
"B" = sample(c(TRUE,FALSE),100,replace=TRUE),
"C" = sample(c(TRUE,FALSE),100,replace=TRUE),
"D" = sample(c(TRUE,FALSE),100,replace=TRUE),
"E" = sample(c(TRUE,FALSE),100,replace=TRUE),
"F" = sample(c(TRUE,FALSE),100,replace=TRUE),
Date = sample(seq(as.Date("2016-01-01"),as.Date("2016-01-31"),by="day"),100,replace=TRUE)
)
employees <- c("A","B","C","D","E","F")
dots <- lapply(c(employees,"Date"),as.symbol)
payments.by_date_employee <- payments %>%
filter(!is.na(Date),!is.na(Amount)) %>%
group_by_(.dots=dots) %>%
summarise(Payment.Count=n(), Amount=sum(Amount),
Tip.Count=sum(Tip.Amount>=0.01,na.rm=TRUE), Tip.Amount=sum(Tip.Amount,na.rm=TRUE)) %>%
ungroup() %>%
arrange(Date)
#long/manual way--------------------------------------------------------------------------------
t <- list()
t[["payments"]][["amount"]] <- mapply(function(name) list({
t.test(subset(payments,payments[[name]]==TRUE)$Amount,
subset(payments,payments[[name]]==FALSE)$Amount)$p.value
}),
employees)
t[["payments"]][["count"]] <- mapply(function(name) list({
t.test(subset(payments.by_date_employee,payments.by_date_employee[[name]]==TRUE)$Amount,
subset(payments.by_date_employee,payments.by_date_employee[[name]]==FALSE)$Amount)$p.value
}),
employees)
t[["tips"]][["amount"]] <- mapply(function(name) list({
t.test(subset(payments,payments[[name]]==TRUE)$Tip.Amount,
subset(payments,payments[[name]]==FALSE)$Tip.Amount)$p.value
}),
employees)
t[["tips"]][["count"]] <- mapply(function(name) list({
t.test(subset(payments.by_date_employee,payments.by_date_employee[[name]]==TRUE)$Tip.Amount,
subset(payments.by_date_employee,payments.by_date_employee[[name]]==FALSE)$Tip.Amount)$p.value
}),
employees)
#long/manual way--------------------------------------------------------------------------------
#attempt at single mapply statement ------------------------------------------------------------
y <- mapply(function(name,type,variable,df,nm) list({
t.test(subset(eval(df),eval(df)[[name]]==TRUE)[[nm]],
subset(eval(df),eval(df)[[name]]==FALSE)[[nm]])$p.value}),
employees,
c("payments","payments","tips","tips"),
c("amount","count"),
c(quote(payments),quote(payments),quote(payments.by_date_employee),quote(payments.by_date_employee)),
c("Amount","Amount","Tip.Amount","Tip.Amount"),
SIMPLIFY = FALSE
)
#attempt at single mapply statement ------------------------------------------------------------
#works but seems convoluted --------------------------------------------------------------------
z <- mapply(function(type) list({
mapply(function(variable,df,nm) list({
t[[type]][[variable]] <-mapply(function(name) list({
t.test(subset(eval(df),eval(df)[[name]]==TRUE)[[nm]],
subset(eval(df),eval(df)[[name]]==FALSE)[[nm]])$p.value}),
employees)
}),
c("amount","count"),
c(quote(payments),quote(payments),quote(payments.by_date_employee),quote(payments.by_date_employee)),
c("Amount","Amount","Tip.Amount","Tip.Amount"),
SIMPLIFY = FALSE
)
}),
c("payments","tips")
)
#works but seems convoluted --------------------------------------------------------------------
答案 0 :(得分:1)
这是一种将问题分解为几个步骤的方法。首先,编写一个带有数据框名称,变量名称和员工代码的函数,并返回所需的值:
ttest <- function(data, varname, employee) {
d <- get(data)
do.call(t.test, setNames(split(d[[varname]], d[[employee]]), c("x", "y")))$p.value
}
现在,使用mapply
将该函数应用于数据框名称,变量名称和员工代码的向量:
out <- mapply(ttest,
rep(c("payments", "payments.by_date_employee"), each = length(employees)),
c(rep(c("Amount", "Tip.Amount"), each = length(employees) * 2)),
employees)
现在,我们拥有了所需的所有价值观。检查这些值是否与列表t
中的值相同:
all.equal(unname(out), unname(unlist(t)))
# [1] TRUE
其余步骤是组织值。我们可以将它们放入数据框中:
d <- data.frame(
type = rep(c("payments", "tips"), each = length(employees) * 2),
variable = rep(c("amount", "count"), each = length(employees), times = 2),
employee = rep(employees, times = 4),
value = out
)
# type variable employee value
# 1 payments amount A 0.23278642
# 2 payments amount B 0.77047594
# ...
# 7 payments count A 0.56123674
# 8 payments count B 0.81040604
# ...
# 13 tips amount A 0.92749503
# 14 tips amount B 0.08716570
# ...
# 23 tips count E 0.20672583
# 24 tips count F 0.23505606
如果您希望将结果作为嵌套列表,还需要再做一步:
y <- lapply(split(d, d$type),
function(x) lapply(split(x, x$variable),
function(y) split(y$value, y$employee)
)
)
all.equal(t, y)
# [1] TRUE
更新。要从t.test
输出获取其他值,请先修改我们的自定义ttest
功能
ttest <- function(data, varname, employee) {
d <- get(data)
unlist(
do.call(t.test, setNames(split(d[[varname]], d[[employee]]), c("x", "y")))[c("estimate", "p.value")]
)
}
在这种情况下,我们会提取estimate
和p.value
的值(对于其他值的名称,您可以检查任何t.test
输出,例如str(t.test(1:3, 4:6))
。{ {1}}函数将我们检索的值(最初以列表形式)展平为向量。
如上所述运行unlist
;现在,mapply
对象是一个矩阵而不是一个向量。假设我们要将值插入数据帧:
out