我正在尝试在python中实现EM算法,但我遇到了一些困难。我似乎得到了不正确的结果和很多错误等。我不太确定从哪里开始,并希望任何人都可以提供帮助。我将在下面发布我当前的代码。
我使用的值如下:
1 6 21 2 53 88 1 26 3 1 4 4 0 23 4 55 13 14 10 5 77 40 62 15 62 1 37 158 90 43 11 10 46 7 5 22 9 18 37 183 64 38 36 2 1 24 284 5 59 6 3 14 6 6 309 51 64 5 38 79 3 0 5 3 4 8 95 33 11 29 36 37 3 30 0 104 5 141 9 28 30 45 22 9 69 10 5 4 55 6 6 1 4 1 5 69 14 41 71 52 59 5 83 10 5 0 32 33 80 29 3 181 1 3 44 2 16 54 209
我的目标是将值分为3类。
import numpy as np
import pandas as pd
import math
import random
data = pd.read_excel('/Users/simongraham/Desktop/testfile.xlsx',delimiter=',')
data = np.asarray(data)
data = data[:,1]
n = len(data)
num_iters = 10
prob_a = np.ones([1,n])
prob_a = prob_a[0]
prob_b = np.ones([1,n])
prob_b = prob_b[0]
prob_c = np.ones([1,n])
prob_c = prob_c[0]
mu_a = random.randint(10,100)
mu_b = random.randint(10,100)
mu_c = random.randint(10,100)
sigma_a = mu_a*0.1
sigma_b = mu_b*0.1
sigma_c = mu_c*0.1
for iteration in range(num_iters):
for i in range(n):
p_a = sum(prob_a) / n
p_b = sum(prob_b) / n
p_c = sum(prob_c) / n
prob_a[i] = (1 / np.sqrt(2 * np.pi * sigma_a)) * np.exp(-((data[i] - mu_a)**2) / 2*sigma_a)
prob_b[i] = (1 / np.sqrt(2 * math.pi * sigma_b)) * np.exp(-((data[i] - mu_b)**2) / 2*sigma_b)
prob_c[i] = (1 / np.sqrt(2 * math.pi * sigma_c)) * np.exp(-((data[i] - mu_c)**2) / 2*sigma_c)
prob_a[i] = (prob_a[i] * p_a ) / ((prob_a[i]* p_a) + (prob_b[i] * p_b) + (prob_c[i] * p_c))
prob_b[i] = (prob_b[i] * p_b ) / ((prob_a[i]* p_a) + (prob_b[i] * p_b) + (prob_c[i] * p_c))
prob_c[i] = (prob_c[i] * p_c ) / ((prob_a[i]* p_a) + (prob_b[i] * p_b) + (prob_c[i] * p_c))
sum1 = 0
for j in range(n):
sum1 += data[j] * prob_a[j]
mu_a = sum1 / np.sum(prob_a)
sum2 = 0
for j in range(n):
sum2 += data[j] * prob_b[j]
mu_b = sum2 / np.sum(prob_b)
sum3 = 0
for j in range(n):
sum3 += data[j] * prob_c[j]
mu_c = sum3 / np.sum(prob_c)
sum4 = 0
for j in range(n):
sum4 += prob_a[j]*((data[j] - mu_a)**2)
sigma_a = sum4 / np.sum(prob_a)
sum5 = 0
for j in range(n):
sum5 += prob_b[j]*((data[j] - mu_b)**2)
sigma_b = sum5 / np.sum(prob_b)
sum6 = 0
for j in range(n):
sum6 += prob_c[j]*((data[j] - mu_c)**2)
sigma_c = sum6 / np.sum(prob_c)
谢谢,
西蒙