sMean = function(x) {
sum = 0;
for (i in 1:length(x)) {
sum = sum + x[i]
}
return(sum/length(x))
}
covariance = function(x,y) {
#Formula is E((x-E[x])(y-E[y])) => E[xy] - E[x]E[y]
meanX = sMean(x)
meanY = sMean(y)
cov = 0;
for (i in 1:length(x)) {
cov = cov + ((x[i] - meanX) * (y[i] - meanY))
}
cov = cov/(length(x)-1)
return (cov)
}
covariance2 = function(x,y) {
#Formula is E((x-E[x])(y-E[y])) => E[xy] - E[x]E[y]
meanX = (sMean(x) * length(x)) /(length(x)-1)
meanY = (sMean(y) * length(x)) /(length(x)-1)
meanXY = (sMean(x*y) * length(x)) /(length(x)-1)
return (meanXY - meanX*meanY)
}
#Output
#> cov(arr,arr2)
#[1] 16.75
#> covariance(arr,arr2)
#[1] 16.75
#> covariance2(arr,arr2) #Why this function give wrong output?
#[1] -9.5
为什么covariance2输出错误?根据{{3}}输出应该是相同的。
答案 0 :(得分:1)
在covariance2()
中,您需要除length(x)
而不是length(x) - 1
。
covariance2 = function(x,y) {
meanX <- sMean(x)
meanY <- sMean(y)
meanXY <- sMean(x*y)
return((meanXY - meanX * meanY) * length(x) / (length(x) - 1))
}
这背后的数学是通过E [x] = sum(x)/ N来表示长度N向量x的期望(即平均值),您可以显示E [xy] -E [x] E [y ] = E [(xE(x))(yE(y))]。这是一个普遍的事实,来自期望运算符的线性。您的covariance()
函数返回此等式的右侧乘以N /(N-1)(即样本协方差)。因此,在covariance2()
中计算的左侧也需要乘以相同的因子。