如何在R中的循环中象征性地求解方程?

时间:2017-01-07 09:01:43

标签: r

我需要对每100个数据点的两类双变量高斯分布的1000个数据集进行高斯极大似然分类。 以下是创建数据集的代码:

# mean vector for two classes
mean1<-c(70,130) ; mean2<-c(148,160)
# covariance matrix for two classes
cov1<-matrix(c(784,-546,-546,900),nrow=2,ncol=2,byrow=TRUE)
cov2<-matrix(c(484,285.1,285.1,324),nrow=2,ncol=2,byrow=TRUE)

 library(MASS)
# Number of samples
nrs <- 1000
# sample size
ss  <- 100
# number of dimensions
d   <- length(mean1)
set.seed(1)
# generation of bivariate normal random variables based on mean vector and covariance matrix for each class
refdata_1 <- replicate(nrs,matrix(mvrnorm(ss, mu = mean1, Sigma = cov1 ),ncol = d,nrow = ss),simplify=FALSE)
refdata_2 <- replicate(nrs,matrix(mvrnorm(ss, mu = mean2, Sigma = cov2 ),ncol = d,nrow = ss),simplify=FALSE)
# calculation of mean vector for each sample of random reference data
mean_refdata_1 <- lapply(refdata_1,colMeans)
mean_refdata_2 <- lapply(refdata_2,colMeans)
# calculation of covariance matrix for each sample of random reference data
cov_refdata_1 <- lapply(refdata_1,cov)
cov_refdata_2 <- lapply(refdata_2,cov)

现在,我需要为1000个数据集中的每一个(因此1000个决策边界)绘制两个类之间的决策边界。

这是决策方程(如果你想知道ln p(类)部分在哪里,两个类都有相同的概率,从而相互抵消):

这是数据点的向量:

x = vector(SR,var('a,b'))

这是决策方程(如果你想知道ln p(类)部分在哪里,两个类都有相同的概率,从而相互抵消):

decision1 =-0.5*log(det(cov1))-0.5*((x-mean1)*cov1.inverse()*(x-mean1)) 
decision2 =-0.5*log(det(cov2))-0.5*((x-mean2)*cov2.inverse()*(x-mean2))

如果decision1(data point) > decision2(data point),则数据点属于第1类。为了获得决策边界,decision1 - decision2 == 0。数据点是RBG图像。因此,数据向量x中的a为0:255。我解决了b的等式:

solve(decision1-decision2==0,b)

在R中,它会查找原始数据集:

m_1<-c(70,130) ; m_2<-c(148,160)
covma_1<-matrix(c(784,-546,-546,900),nrow=2,ncol=2,byrow=TRUE)
covma_2<-matrix(c(484,285.1,285.1,324),nrow=2,ncol=2,byrow=TRUE)
library(rSymPy)  
c11 <- Var("c11")
c12 <- Var("c12")
c13 <- Var("c13")
c14 <- Var("c14")
sympy("covma_1 = Matrix([[c11,c12], [c13,c14]])")
a <- Var("a")
b <- Var("b")
sympy("x = Matrix([a,b])")
m11 <- Var("m11")
m12 <- Var("m12")
sympy("m_1 = Matrix([m11,m12])")

sympy("covma_1=covma_1.subs(c11,784)")
sympy("covma_1=covma_1.subs(c12,-546)")
sympy("covma_1=covma_1.subs(c13,-546)")
sympy("covma_1=covma_1.subs(c14,900)")
sympy("m_1=m_1 .subs(m11,70)")
sympy("m_1=m_1 .subs(m12,130)")

first  <-sympy("-0.5*log(covma_1.det())")
second <-sympy("-0.5*((x-m_1).T*covma_1.inv()*(x-m_1))")

second<-gsub("\\[","",second)
second<-gsub("\\]","",second)

c21 <- Var("c21")
c22 <- Var("c22")
c23 <- Var("c23")
c24 <- Var("c24")
sympy("covma_2 = Matrix([[c21,c22], [c23,c24]])")
m21 <- Var("m21")
m22 <- Var("m22")
sympy("m_2 = Matrix([m21,m22])")

sympy("covma_2=covma_2.subs(c21,484)")
sympy("covma_2=covma_2.subs(c22,285.1)")
sympy("covma_2=covma_2.subs(c23,285.1)")
sympy("covma_2=covma_2.subs(c24,324)")
sympy("m_2=m_2.subs(m21,148)")
sympy("m_2=m_2.subs(m22,160)")

third  <-sympy("-0.5*log(covma_2.det())")
fourth <-sympy("-0.5*((x-m_2).T*covma_2.inv()*(x-m_2))")

fourth<-gsub("\\[","",fourth)
fourth<-gsub("\\]","",fourth)

class1 <- paste(c(first,second),collapse="")
class2 <- paste(c(third,fourth),collapse="")

sympy(paste(c("hm=solve(",class2,"-","(",class1,")",",b)"), collapse = ""))

正如您所看到的,我使用非常讨厌的字符串操作来解析为sympy。无论如何,在同意解决b之后,我坚持并且不知道如何获得数值。有人可以告诉我如何以符号方式解决b并将其绘制在1000个数据集的循环中?我也对非符号方法持开放态度。

感谢任何帮助!

0 个答案:

没有答案