一、问题描述
二、算法核心思想分析
贝叶斯决策论的核心思想是由先验概率和类条件概率密度求后验概率,进而判断样本点类型,这里简化为二分类问题,并且符合正态分布,只需求出均值、方差、维度和先验概率代入判别函数计算即可。
三、代码参考
我这里用python实现,主要用到xlrd和numpy两个库,excel文件中总共30行数据,每列依次为:x1, x2, x3, w
import xlrd
import numpy as np
# 读取数据
def read_data():
x = []
data = xlrd.open_workbook("lab1_data.xlsx")
table = data.sheets()[0]
rows = table.nrows
for i in range(1, rows):
row_value = table.row_values(i)
if row_value[3] != 3:
x.append(row_value)
return x
# 计算均值
def get_u(x):
u = np.mean(x, axis=0) # 求每列的均值
return u
# 计算协方差
def get_sigmal(x):
sigmal = np.cov(np.mat(x).T)
return sigmal
# 计算g(x)
def get_g(x, u, sigmal, pw, d):
x = np.mat(x)
u = np.mat(u)
sigmal = np.mat(sigmal)
pw = np.mat(pw)
g = -0.5 * (x - u) * sigmal.I * (x - u).T - d / 2 * np.log(2 * np.pi) - 0.5 * np.log(np.linalg.det(sigmal)) + np.log(pw)
return g
def main(dimension):
d = dimension
g = [0, 0, 0]
pw = (0.5, 0.5, 0)
u = [0, 0, 0]
sigmal = [0, 0, 0]
data = read_data()
for i in range(len(pw)-1):
xi = [x[:d] for x in filter(lambda x: x[3] == i + 1, data)]
u[i] = get_u(xi)
sigmal[i] = get_sigmal(xi)
count_true = 0
count_false = 0
for i in range(len(data)):
x = data[i][:d]
w = data[i][3]
print("x =", x)
g[0] = get_g(x, u[0], sigmal[0], pw[0], d)
g[1] = get_g(x, u[1], sigmal[1], pw[1], d)
print("g1(x) =", g[0], "g2(x) =", g[1])
if g[0] > g[1]:
flag = w == 1
print("w1", flag)
else:
flag = w == 2
print("w2", flag)
if flag:
count_true += 1
else:
count_false += 1
error_rate = count_false/len(data)
accuracy = count_true/len(data)
print("Accuracy = ", accuracy)
print("Error Rate = ", error_rate)
if __name__ == '__main__':
main(1) # 单特征值
main(2) # 双特征值
main(3) # 三特征值
四、运行结果
1、单特征值
均值方差:
各样本点结果:
误差率30%
2、双特征值
均值方差:
各样本点结果:
误差率45%
3、三特征值
均值方差:
各样本点结果:
误差率15%
如有错误请指正