【實踐】數據挖掘DM課程課業打卡實驗2 樸素貝葉斯分類器
叮嘟!這裏是小啊嗚的學習課程資料整理。好記性不如爛筆頭,今天也是努力進步的一天。一起加油進階吧!
一、實驗目的
(1)理解樸素貝葉斯分類器的工作原理。
(2)編程實現樸素貝葉斯分類器。
二、實驗內容
1、(難度1)編程實現樸素貝葉斯分類器的訓練。假設數據只涉及連續屬性。
測試代碼如下:
load('trainingData.mat');
load('testingData.mat');
[Params,prior,AllLabels] = NaiveBayesTrain1(trAttr, trLabels);
(將執行正確的代碼粘貼在此處,核心代碼要求有註釋)
function [Params,prior,AllLabels] = NaiveBayesTrain(trAttr, trLabels)
%NaiveBayesTrain只處理連續分類,不處理分類屬性;
AllLabels = unique(trLabels);
numClass = length(AllLabels); %取得所有類標號的種類個數;
for i =1:numClass
ind=find(trLabels==AllLabels(i));
Ind4Class{i,1}=ind;
prior(i)=length(ind)/length(trLabels);
end
numAttr=size(trAttr,2);
Params{numClass,numAttr}=[];%初始化參數cell;
for AttrInd =1:numAttr
AttrVals=trAttr(:,AttrInd);
for i=1:numClass
Params{i,AttrInd}=GetParams4ContiAttr(AttrVals,Ind4Class{i,1});
end
end
end
function Params1=GetParams4ContiAttr(AttrVals,ind)
AttrVals4ThisClass=AttrVals(ind);
Params1(1)=mean(AttrVals4ThisClass); %求得均值;
Params1(2)=var(AttrVals4ThisClass); %求得方差;
end
2、(難度2)編程實現樸素貝葉斯分類器的訓練和分類。
假設數據只涉及連續屬性。
測試代碼如下:
load('trainingData.mat');
load('testingData.mat');
[Labels]=bys_classify1(trAttr, trLabels,tstAttr);
acc=sum(Labels ==tstLabels)/length(Labels);
(將執行正確的代碼粘貼在此處,核心代碼要求有註釋)
% load('trainingData.mat');
% load('testingData.mat');
% [Labels]=bys_classify1(trAttr, trLabels,tstAttr);
% acc=sum(Labels ==tstLabels)/length(Labels);
%bys_classify1實現樸素貝葉斯分類器的訓練和分類。數據只涉及連續屬性。
function [Labels]=bys_classify1(trAttr,trLabels,tstAttr)
[Params,prior,AllLabels]=NaiveBayesTrain(trAttr,trLabels);
N=size(tstAttr,1);
Labels=zeros(N,1);
for i=1:N
tstAttrSample=tstAttr(i,:);
post=NaiveBayesPredict(Params,prior,tstAttrSample);
[maxValue,index]=max(post);
Labels(i)=AllLabels(index);
end
end
function post=NaiveBayesPredict(Params,prior,tstAttr)%%分類
numClass=length(prior);
numAttr=length(tstAttr);
post=[];
for i=1;numClass
p_AttrCond=1;
for AttrInd=1;numAttr
AttrVals=tstAttr(AttrInd);
mean=Params{i,AttrInd}(1);
varriance=Params{i,AttrInd}(2);
sig=sqrt(varriance);
end
end
post(i)=p_AttrCond*prior(i);
end
function [Params,prior,AllLabels]=NaiveBayesTrain(trAttr,trLabels)%%訓練
AllLabels=unique(trLabels);
numClass=length(AllLabels);
for i=1:numClass
ind=find(trLabels==AllLabels(i));
Ind4Class{i,1}=ind;
prior(i)=length(ind)/length(trLabels);
end
numAttr=size(trAttr,2);
Params{numClass,numAttr}=[];
for AttrInd=1:numAttr
AttrVals=trAttr(:,AttrInd);
for i=1:numClass
Params{i,AttrInd}=GetParams4ContiAttr(AttrVals,Ind4Class{i,1});
end
end
end
function Params1=GetParams4ContiAttr(AttrVals,ind)
AttrVals4ThisClass=AttrVals(ind);
Params1(1)=mean(AttrVals4ThisClass);
Params1(2)=var(AttrVals4ThisClass);
end
3、(難度2)編程實現樸素貝葉斯分類器的訓練。
假設數據既涉及連續屬性,也涉及分類屬性。
測試代碼如下:
refund=[1 2 2 1 2 2 1 2 2 2]';
mar=[1 2 1 2 3 2 3 1 2 1]';
inc=[125,100,70,120,95,60,220,85,75,90]';
trAttr =[refund,mar,inc];
trLabels=[1 1 1 1 2 1 1 2 1 2]';
tstAttr=[2,2,120];
[type,Params,prior,AllLabels] = NaiveBayesTrain2(trAttr, trLabels);
(將執行正確的代碼粘貼在此處,核心代碼要求有註釋)
% refund=[1 2 2 1 2 2 1 2 2 2]';
% mar=[1 2 1 2 3 2 3 1 2 1]';
% inc=[125,100,70,120,95,60,220,85,75,90]';
% trAttr =[refund,mar,inc];
% trLabels=[1 1 1 1 2 1 1 2 1 2]';
% tstAttr=[2,2,120];
% [type,Params,prior,AllLabels] = NaiveBayesTrain2(trAttr, trLabels);
function [type,Params,prior,AllLabels]=NaiveBayesTrain2(trAttr,trLabels)
AllLabels=unique(trLabels);
numClass=length(AllLabels); %取得所有類標號的種類個數
for i=1:numClass
ind=find(trLabels == AllLabels(i));
Ind4Class{i,1}=ind;
prior(i)=length(ind)/length(trLabels);
end
numAttr=size(trAttr,2);
Params{numClass,numAttr}=[]; %初始化參數cell;
for AttrInd=1:numAttr
AttrVals=trAttr(:,AttrInd);
numAttrValsClass=length(unique(AttrVals));%取得所有屬性標號的種類個數;
maxAttrVals=max(unique(AttrVals));%取得當前的最大屬性標號;
if(isempty(find(rem(AttrVals,1)~=0)) && numAttrValsClass<10 && maxAttrVals ==numAttrValsClass)
%若屬性爲分類屬性K種取值情況,要求取值爲1,2,....k;
%屬性爲分類屬性,type=0;
type(AttrInd)=0;
for i=1:numClass
Params{i,AttrInd}=GetParams4CategoricalAttr(AttrVals,Ind4Class{i,1});
end
else
type(AttrInd)=1;
for i=1:numClass
Params{i,AttrInd}=GetParams4ContiAttr(AttrVals,Ind4Class{i,1});
end
end
end
end
function Params1=GetParams4ContiAttr(AttrVals,ind)
AttrVals4ThisClass=AttrVals(ind);
Params1(1)=mean(AttrVals4ThisClass); %求得均值;
Params1(2)=var(AttrVals4ThisClass); %求得方差;
end
function Params1=GetParams4CategoricalAttr(AttrVals,ind)
Params1=[];
AllAttrVals=unique(AttrVals);
numAttrValsClass=length(AllAttrVals);
AttrVals4ThisClass=AttrVals(ind);
nj=length(ind);
for i=1:numAttrValsClass
nij=length(find(AttrVals4ThisClass==AllAttrVals(i)));
Params1(i)=nij/nj;
end
end
4、(難度3)編程實現樸素貝葉斯分類器的訓練和分類。
(將執行正確的代碼粘貼在此處,核心代碼要求有註釋)
function [Labels]=bys_classify(trAttr,trLabels,tstAttr)
[type,Params,prior,AllLabels]=NaiveBayesTrain2(trAttr,trLabels);
N=size(tstAttr,1);
Labels=zeros(N,1);
for i=1:N
tstAttrSample=tstAttr(i,:);
post=NaiveBayesPredict(type,Params,prior,tstAttrSample);
[maxValue,index]=max(post);
Labels(i)=AllLabels(index);
end
end
function post=NaiveBayesPredict(type,Params,prior,tstAttr)
numClass=length(prior); %取得所有類標號的種類個數;
numAttr=length(tstAttr);
post=[];
for i=1:numClass
p_AttrCond=1;
for AttrInd=1:numAttr
AttrVals=tstAttr(AttrInd);
if(type(AttrInd)==0)
p=Params{i,AttrInd}(AttrVals); %屬性編號是1,2;屬性編號必須連續且從1開始;
p_AttrCond=p_AttrCond*p;
%屬性爲連續屬性,type=1;
else
mean=Params{i,AttrInd}(1);
varriance=Params{i,AttrInd}(2);
sig=sqrt(varriance);
p=(1/(sqrt(2*pi)*sig))*exp(-(AttrVals-mean)^2/(2*sig^2));
p_AttrCond=p_AttrCond*p;
end
end
post(i)=p_AttrCond*prior(i);
end
end
function [type,Params,prior,AllLabels]=NaiveBayesTrain2(trAttr,trLabels)
AllLabels=unique(trLabels);
numClass=length(AllLabels); %取得所有類標號的種類個數
for i=1:numClass
ind=find(trLabels == AllLabels(i));
Ind4Class{i,1}=ind;
prior(i)=length(ind)/length(trLabels);
end
numAttr=size(trAttr,2);
Params{numClass,numAttr}=[]; %初始化參數cell;
for AttrInd=1:numAttr
AttrVals=trAttr(:,AttrInd);
numAttrValsClass=length(unique(AttrVals));%取得所有屬性標號的種類個數;
maxAttrVals=max(unique(AttrVals));%取得當前的最大屬性標號;
if(isempty(find(rem(AttrVals,1)~=0)) && numAttrValsClass<10 && maxAttrVals ==numAttrValsClass)
%若屬性爲分類屬性K種取值情況,要求取值爲1,2,....k;
%屬性爲分類屬性,type=0;
type(AttrInd)=0;
for i=1:numClass
Params{i,AttrInd}=GetParams4CategoricalAttr(AttrVals,Ind4Class{i,1});
end
else
type(AttrInd)=1;
for i=1:numClass
Params{i,AttrInd}=GetParams4ContiAttr(AttrVals,Ind4Class{i,1});
end
end
end
end
function Params1=GetParams4ContiAttr(AttrVals,ind)
AttrVals4ThisClass=AttrVals(ind);
Params1(1)=mean(AttrVals4ThisClass); %求得均值;
Params1(2)=var(AttrVals4ThisClass); %求得方差;
end
function Params1=GetParams4CategoricalAttr(AttrVals,ind)
Params1=[];
AllAttrVals=unique(AttrVals);
numAttrValsClass=length(AllAttrVals);
AttrVals4ThisClass=AttrVals(ind);
nj=length(ind);
for i=1:numAttrValsClass
nij=length(find(AttrVals4ThisClass==AllAttrVals(i)));
Params1(i)=nij/nj;
end
end
Ending!
更多課程知識學習記錄隨後再來吧!
就醬,嘎啦!
注:
人生在勤,不索何獲。