我正在尝试实现一个简单的EM算法。到目前为止,它似乎运作良好,除了差异迅速缩小到零的小问题,收敛于数据的均值。 (如果我不更新方差,它将收敛到平均值!)
据我所知,这是由于“加权”靠近中心的点太多 - 因此使算法降低方差并缩小到零。当我将公式从更改为时,算法运行得更好(除了略微过高的方差,这是预期的)。这是我的代码的问题吗?
class DataPoint {
int nDims; // Number of dimensions
float[] data;
DataPoint(int n) {nDims = n; data = new float[n];}
DataPoint(float[] d) {nDims = d.length; data = d;}
}
float sum(float[] d) {float ret = 0; for (int i = 0; i < d.length; ++i) {ret += d[i];} return ret;}
float[] sub(float[] f, float[] u) {float[] ret = new float[f.length]; for (int i = 0; i < f.length; ++i) {ret[i] = f[i] - u[i];} return ret;}
float distSq(float[] d) {float ret = 0; for (int i = 0; i < d.length; ++i) {ret += d[i]*d[i];} return ret;}
float distSq(float[][] d) {float ret = 0; for (int i = 0; i < d.length; ++i) {ret += distSq(d[i]);} return ret;}
float det(float[][] mat) {
if (mat.length == 2 && mat[0].length == 2) {
float det = (mat[0][0] * mat[1][1]) - (mat[0][1] * mat[1][0]);
return det;
}
throw new RuntimeException("Det has to be 2x2");
}
float[][] inverse(float[][] mat) {
if (mat.length == 2 && mat[0].length == 2) {
float det = mat[0][0] * mat[1][1] - mat[0][1] * mat[1][0];
float[][] ret = {{mat[1][1]/det, -mat[0][1]/det}, {-mat[1][0]/det, mat[0][0]/det}};
return ret;
}
throw new RuntimeException("Inverse has to be 2x2");
}
class GMM {
int number;
int dims;
float[] weights;
float[][] means;
float[][][] covariances;
float[][][] invCov;
GMM(int gNo, int noDimensions) {
number = gNo;
dims = noDimensions;
weights = new float[gNo];
means = new float[gNo][noDimensions];
covariances = new float[gNo][noDimensions][noDimensions];
invCov = new float[gNo][noDimensions][noDimensions];
// Initialise to random values.
for (int i = 0; i < gNo; ++i) {
weights[i] = random(0, 1);
for (int j = 0; j < noDimensions; ++j) {
means[i][j] = random(-100,100);
covariances[i][j][j] = 100;
invCov[i] = inverse(covariances[i]);
}
}
normaliseWeights();
}
float[][] EStep(DataPoint[] data) {
// For each data point, return probablility of each gaussian having generated it
// Arguments: n-dimensional data
float[][] ret = new float[number][data.length];
for (int Gauss = 0; Gauss < number; ++Gauss) {
for (int i = 0; i < data.length; ++i) {
ret[Gauss][i] = calculateProbabilityFast(data[i], Gauss);
}
}
return ret;
}
void MStep(DataPoint[] data, float[][] dataProbabilities) {
for (int Gauss = 0; Gauss < number; ++Gauss) {
means[Gauss] = new float[data[0].nDims]; // Reset dims to zero
float probSum = 0;
for (int i = 0; i < dataProbabilities[Gauss].length; ++i) {
probSum += dataProbabilities[Gauss][i];
for (int j = 0; j < means[Gauss].length; ++j) {
means[Gauss][j] += data[i].data[j] * dataProbabilities[Gauss][i];
}
}
for (int i = 0; i < means[Gauss].length; ++i) {
means[Gauss][i] /= probSum; // Normalise
}
// Means[Gauss] has been updated
// Now for covariance.... :x
covariances[Gauss] = new float[data[0].nDims][data[0].nDims];
for (int m = 0; m < data[0].nDims; ++m) {
for (int n = 0; n < data[0].nDims; ++n) {
for (int i = 0; i < dataProbabilities[Gauss].length; ++i) {
covariances[Gauss][m][n] += (data[i].data[m]-means[Gauss][m])*(data[i].data[n]-means[Gauss][n])*dataProbabilities[Gauss][i];
}
}
}
// Created a triangular matrix, normalise and then update other half too.
for (int m = 0; m < data[0].nDims; ++m) {
for (int n = 0; n < data[0].nDims; ++n) {
covariances[Gauss][m][n] /= probSum;
}
}
// Update inverses
invCov[Gauss] = inverse(covariances[Gauss]);
weights[Gauss] = probSum;
}
normaliseWeights();
}
float calculateProbabilityFast(DataPoint x, int Gauss) {
float ret = pow(TWO_PI, dims/2.0)*sqrt(det(covariances[Gauss]));
float exponent = 0;
for (int i = 0; i < x.nDims; ++i) {
float temp = 0;
for (int j = 0; j < x.nDims; ++j) {
temp += (x.data[j] - means[Gauss][j])*invCov[Gauss][i][j];
}
exponent += temp*(x.data[i] - means[Gauss][i]);
}
exponent = exp(-0.5*exponent);
// ==================================================================
// If I change this line HERE to -0.3*exponent, everything works fine
// ==================================================================
//print(exponent); print(","); println(ret);
return exponent/ret;
}
void normaliseWeights() {
float sum = sum(weights);
for (int i = 0; i < number; ++i) {weights[i] /= sum;}
}
void display() {
ellipseMode(CENTER);
for (int i = 0; i < number; ++i) {
//strokeWeight(weights[i]*100);
strokeWeight(5);
stroke(color(255, 0, 0));
point(means[i][0], means[i][1]);
noFill();
strokeWeight(1.5);
ellipse(means[i][0], means[i][1], (covariances[i][0][0]), (covariances[i][1][1]));
ellipse(means[i][0], means[i][1], (covariances[i][0][0]*2), (covariances[i][1][1]*2));
fill(0);
}
}
}
DataPoint[] data;
final int size = 10000;
GMM MixModel;
void setup() {
// Hidden gaussians
size(800,600);
MixModel = new GMM(1, 2); // 1 gaussians, 2 dimensions.
data = new DataPoint[size];
int gNo = 1;
float gxMeans[] = new float[gNo];
float gxVars[] = new float[gNo];
float gyMeans[] = new float[gNo];
float gyVars[] = new float[gNo];
float covars[] = new float[gNo];
for (int i = 0; i < gNo; ++i) {
gxMeans[i] = random(-100, 100);
gxVars[i] = random(5, 40);
gyMeans[i] = random(-100, 100);
gyVars[i] = random(5, 40); // Actually std. devs!!
covars[i] = 0;//random(-1, 1);
println("Vars: " + str(pow(gxVars[i], 2)) + ", " + str(pow(gyVars[i], 2)));
println("Covar: " + str(covars[i]));
}
for (int i = 0; i < size; ++i) {
int gauss = (int)random(gNo);
data[i] = new DataPoint(2);
data[i].data[0] = randomGaussian()*gxVars[gauss] + gxMeans[gauss];
data[i].data[1] = (randomGaussian()*gyVars[gauss])*(1-abs(covars[gauss]))+(gyVars[gauss]*covars[gauss]*(data[i].data[0]-gxMeans[gauss])/gxVars[gauss]) + gyMeans[gauss];
}
frameRate(5); // Let's see what's happening!
}
void draw() {
translate(width/2, height/2); // set 0,0 at centre
background(color(255, 255, 255));
stroke(0);
strokeWeight(1);
for (int i = 0; i < size; ++i) {
point(data[i].data[0], data[i].data[1]);
}
MixModel.display();
float[][] dataProbs = MixModel.EStep(data);
MixModel.MStep(data, dataProbs);
print(MixModel.covariances[0][0][0]); print(", ");
println(MixModel.covariances[0][1][1]);
}
编辑:完整,极简的工作示例。方差仍然收敛于0,所以这表明我可能在算法上做错了什么?
import random, statistics, math
hiddenMu = random.uniform(-100, 100)
hiddenVar = random.uniform(10, 30)
dataLen = 10000
data = [random.gauss(hiddenMu, hiddenVar) for i in range(dataLen)]
hiddenVar **= 2 # Make it the actual variance rather than std. dev.
print("Variance: " + str(hiddenVar) + ", actual: " + str(statistics.variance(data)))
print("Mean : " + str(hiddenMu ) + ", actual: " + str(statistics.mean (data)))
guessMu = random.uniform(-100, 100)
guessVar = 100
print("Initial mu guess: " + str(guessMu))
print("Initial var guess: " + str(guessVar))
# perform iterations
numIters = 100
for i in range(numIters):
dataProbs = [math.exp(-0.5*((i-guessMu)**2)/guessVar)/((2*math.pi*guessVar)**0.5) for i in data]
guessMu = sum(map(lambda x: x[0]*x[1], zip(dataProbs, data)))/sum(dataProbs)
guessVar = sum(map(lambda x: x[0]*((x[1]-guessMu)**2), zip(dataProbs, data)))/sum(dataProbs)
print(str(i) + " mu guess: " + str(guessMu))
print(str(i) + " var guess: " + str(guessVar))
print()
编辑2 :我可能需要贝塞尔的纠正吗? (将结果乘以n /(n-1))。如果是这样,当概率本身的总和可能小于1时,我该怎么做呢?
答案 0 :(得分:0)
对于其他任何有同样问题的人,我现在理解在GMM中将点分配给高斯人的条件。您可能会在程序中显示NaN,例如问题EM algorithm code is not working
不是使用上面列出的公式为每个点分配属于高斯的概率,而是需要为所有列出的高斯分配该概率然后标准化 - 这意味着当一个点可以是如果高斯是唯一一个可以生成高斯的高斯,那么它被分配给高斯,它就变得完全被分配 - 即概率变为1,即使它最初只有非常小的机会来自该分布。