個人學習記錄,只進行了書上例題的結果驗證,沒有改寫成函數。
%%%%%% 樸素貝葉斯算法 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clc
clear
X = {1,'S';1,'M';1,'M';1,'S';1,'S';2,'S';2,'M';2,'M';2,'L';2,'L';3,'L';3,'M';3,'M';3,'L';3,'L'};
Y = [-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]';
x = {2,'S'};
%%%%%%% 輸入參數X:訓練數據的特徵集 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% Y:訓練數據的目標集 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% x:待分類數據的特徵 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% 輸出參數y:待分類數據的類別 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Num = length(Y); %訓練樣本總數
%計算正負類的先驗概率
PositiveIndex = find(Y == 1);
NegativeIndex = find(Y == -1);
PositiveNum = length(PositiveIndex); %訓練集中正類的個數
NegativeNum = length(NegativeIndex); %訓練集中負類的個數
format rat
PositiveP = PositiveNum/Num; %正類的先驗概率
NegativeP = NegativeNum/Num; %負類的先驗概率
%計算條件概率,即類別確定的條件下,訓練集中不同特徵取值的概率
PositiveX1 = X(PositiveIndex,1); %正類對應的特徵1
PositiveX2 = X(PositiveIndex,2); %正類對應的特徵2
NegativeX1 = X(NegativeIndex,1); %負類對應的特徵1
NegativeX2 = X(NegativeIndex,2); %負類對應的特徵2
P_PositiveX11 = length(find(cell2mat(PositiveX1)==1))/PositiveNum;
P_PositiveX12 = length(find(cell2mat(PositiveX1)==2))/PositiveNum;
P_PositiveX13 = length(find(cell2mat(PositiveX1)==3))/PositiveNum;
P_PositiveX21 = length(find(cell2mat(PositiveX2)=='S'))/PositiveNum;
P_PositiveX22 = length(find(cell2mat(PositiveX2)=='M'))/PositiveNum;
P_PositiveX23 = length(find(cell2mat(PositiveX2)=='L'))/PositiveNum;
P_NegativeX11 = length(find(cell2mat(NegativeX1)==1))/NegativeNum;
P_NegativeX12 = length(find(cell2mat(NegativeX1)==2))/NegativeNum;
P_NegativeX13 = length(find(cell2mat(NegativeX1)==3))/NegativeNum;
P_NegativeX21 = length(find(cell2mat(NegativeX2)=='S'))/NegativeNum;
P_NegativeX22 = length(find(cell2mat(NegativeX2)=='M'))/NegativeNum;
P_NegativeX23 = length(find(cell2mat(NegativeX2)=='L'))/NegativeNum;
%待分類實例x的後驗概率
P_Positive_x = [PositiveP*P_PositiveX12*P_PositiveX21,1];
P_Negative_x = [NegativeP*P_NegativeX12*P_NegativeX21,-1];
%選擇後驗概率最大的類別作爲實例x的類別
P_x = [P_Positive_x;P_Negative_x];
y = P_x(find(P_x(:,1) == max(P_x(:,1))),2)