我有 2 个数据框,必须通过基于 varD
进行一些计算在 data_2
中创建一个新列 data_1
。 varD
的值将使用公式 (varA+pi*VarB)/VarC
计算。所以你注意到我在 varC
中只有 2 个 data_2
值。对于它们中的每一个,varD
的对应值将是平均值,即计算 5 个可能的值,然后取平均值。
data_1<- data.frame(date=c("2015-01-06 00:01:00", "2015-01-06 00:01:01","2015-01-06 00:01:02",
"2015-01-06 00:01:03", "2015-01-06 00:01:04"),
varA=c(1,5,4,8,6),
VarB=c(6,88,54,698,21))
data_2<-data.frame(varC=c(0.5,1.6))
我做了data_1$newVar<-data_1$varA+pi*data_1$VarB
。如何使用给定条件在我的 varD
中添加 data_2
,即 varD
的第一个值是 {(1+pi*6)/0.5 +(5+pi*88)/0.5...}/5
?
答案 0 :(得分:2)
对不起,虽然它应该可以工作,但有点尴尬。
# your data
data_1<- data.frame(date=c("2015-01-06 00:01:00", "2015-01-06 00:01:01","2015-01-06 00:01:02",
"2015-01-06 00:01:03", "2015-01-06 00:01:04"),
varA=c(1,5,4,8,6),
VarB=c(6,88,54,698,21))
data_2<-data.frame(varC=c(0.5,1.6))
# your calculation
data_1$newVar<-data_1$varA+pi*data_1$VarB
# safe as variable
variableC1 <- data_2[1,1]
variableC2 <- data_2[2,1]
# save your logic as variable
varD1 <- sum(data_1$newVar/variableC1)/5
varD2 <- sum(data_1$newVar/variableC2)/5
# vector for column
varD <- (c(varD1, varD2))
# add to data_2
data_2 <- cbind(varC=data_2$varC, varD)
答案 1 :(得分:1)
你可以这样做。
tmp=matrix(0, nrow=5, ncol=2)
for (i in 1:nrow(data_2)) {
tmp[,i]=data_1$newVar/(data_2$varC)[i]
}
varD=apply(tmp, 2, mean)
data_2[,"varD"]=varD
varC varD
1 0.5 1099.1043
2 1.6 343.4701
答案 2 :(得分:1)
基础 R
# get average function
myfun <- function(x, y, z) { lapply(z, function(n) mean((x + (pi * y))/n)) }
# apply your function to data
data_2$varD <- myfun(x = data_1$varA, y = data_1$VarB, z = data_2$varC )
数据表
library('data.table')
setDT(data_2)
data_2[, varD := myfun(x = data_1$varA, y = data_1$VarB, z = varC )]
输出:
data_2
# varC varD
#1: 0.5 1099.104
#2: 1.6 343.4701
数据:
data_1<- data.frame(date=c("2015-01-06 00:01:00", "2015-01-06 00:01:01","2015-01-06 00:01:02",
"2015-01-06 00:01:03", "2015-01-06 00:01:04"),
varA=c(1,5,4,8,6),
VarB=c(6,88,54,698,21))
data_2<-data.frame(varC=c(0.5,1.6))