EM算法-python

em算法的細節可以看書

 


#模擬兩個正態分佈的均值估計
 
from numpy import *
import numpy as np
import random
import copy


SIGMA = 6
EPS = 0.0001

#生成方差相同,均值不同的樣本
def generate_data():	
	Miu1 = 20
	Miu2 = 40
	N = 1000
	X = mat(zeros((N,1)))
	for i in range(N):
		temp = random.uniform(0,1)
		if(temp > 0.3):
			# 均值爲24.5,取值範圍爲23-26
			X[i] = temp + Miu1
		else:
			# 均值爲41.5,取值範圍爲41-43
			X[i] = temp + Miu2
	return X
 
# EM算法
def my_EM(X):
	# 該模型包含兩個單高斯
	k = 2
	# 數據量爲N
	N = len(X)
	# 隨機生成一個2x1的矩陣
	Miu = np.random.randn(2,1)
	# 注意要採用浮點數,給一個較合適的初始值較好
	Sigma = np.array([[15.],[2.]])
	# 每個分量的權值
	weight = np.array([[0.5],[0.5]])
	# 初始化1000個數據的後驗概率1000x2
	Posterior = mat(zeros((N,2)))

	dominator = 0
	numerator = 0
	# 先求後驗概率
	for iter in range(1000):
		for i in range(N):
			dominator = 0
			# estimate
			for j in range(k):
				# 求樣本在當前模型下的整體概率
				dominator = dominator + weight[j] / (Sigma[j]) * np.exp(-1.0/(2.0*(Sigma[j])**2) * (X[i] - Miu[j])**2)
			# 求樣本在某個高斯分量下的概率值,以及與整體概率的
			for j in range(k):
				numerator = weight[j] / (Sigma[j]) * np.exp(-1.0/(2.0*(Sigma[j])**2) * (X[i] - Miu[j])**2)
				Posterior[i,j] = numerator/dominator
		# 參數值放到舊的參數中			
		oldMiu = copy.deepcopy(Miu)
		# 得到後驗概率
		print(Posterior)
		#最大化	
		for j in range(k):
			numerator = 0
			dominator = 0
			for i in range(N):
				numerator = numerator + Posterior[i,j] * X[i]
				dominator = dominator + Posterior[i,j]
			weight[j] = 1 / N * dominator
			Miu[j] = numerator/dominator

			numerator = 0
			for i in range(N):
				numerator = numerator + Posterior[i,j] * (( X[i] - Miu[j]) ** 2)
			Sigma[j] = np.sqrt(numerator/dominator)



		print ((abs(Miu - oldMiu)).sum()) 
			#print '\n'
		if (abs(Miu - oldMiu)).sum() < EPS:
			print(Miu,Sigma,weight,iter)
			break
 

if __name__ == '__main__':
	X = generate_data()
	print(X)
	my_EM(X)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章