k近鄰算法
需求
rm(list=ls())
knnProp=read.csv("kNN_fictious.csv",header = FALSE)
#樣本個數
Prow=86
#測試數據的個數
Srow=258
#循環次數
forNum=30
#knn算法
knn <- function(P,S,K){
#數組,存儲最短的K個距離,樣本分類標籤和分類結果
#前K列爲距離,1+K到2K爲樣本分類標籤
#2K+1爲分類結果,2K+2爲原始分類標籤
#2K+3爲分類結果是否正確的判斷,T爲正確,F爲錯誤
distanceArray=array(0,dim=c(Prow,2*K+3))
for(i in 1:Prow){
#臨時數組,存儲258個距離和樣本分類標籤
distanceC=array(0,dim=c(Srow,2))
for(j in 1:Srow){
#計算歐式距離
distance=sqrt((P[i,]$V1-S[j,]$V1)^2+
(P[i,]$V2-S[j,]$V2)^2+
(P[i,]$V3-S[j,]$V3)^2+
(P[i,]$V4-S[j,]$V4)^2)
#存儲距離
distanceC[j,1]=distance
#存儲樣本分類標籤
distanceC[j,2]=S[j,]$V5
}
#距離排序
distanceC=distanceC[order(distanceC[,1]),]
#兩個暫時變量,用於判斷分類
numA=0
numB=0
#存儲前K個距離,樣本分類標籤
for(k in 1:K){
distanceArray[i,k]=distanceC[k,1]
distanceArray[i,k+K]=distanceC[k,2]
if(distanceC[k,2]==1){
numA=numA+1
} else{
numB=numB+1
}
}
#判斷分類結果,並存儲
if(numA>numB){
distanceArray[i,2*K+1]=1
} else{
distanceArray[i,2*K+1]=-1
}
#存儲待分類數據的原始分類結果
distanceArray[i,2*K+2]=P[i,]$V5
#判斷分類是否正確
if(distanceArray[i,2*K+2]==distanceArray[i,2*K+1]){
distanceArray[i,2*K+3]='T'
} else{
distanceArray[i,2*K+3]='F'
}
}
return(distanceArray)
}
#求正確率
Accuracy <- function(distanceArray,K){
#分類正確的個數
TNum=0
#分類錯誤的個數
FNum=0
#正確率判斷
for(i in 1:Prow){
if(distanceArray[i,2*K+3]=='T'){
TNum=TNum+1
} else{
FNum=FNum+1
}
}
accuracy=TNum/(TNum+FNum)
return(accuracy)
}
#隨機抽取樣本循環30次,求30次正確率
Sample <- function(prop,K){
accuracy=c()
for(i in 1:forNum){
sampleNum=sample(1:344,344)
distanceArray=knn(knnProp[sampleNum[1:86],],knnProp[sampleNum[87:344],],K)
accuracy=c(accuracy,Accuracy(distanceArray,K))
}
rst=mean(accuracy)
return(rst)
}
rst1=Sample(knnProp,3)
rst2=Sample(knnProp,7)
rst3=Sample(knnProp,11)
rst1
rst2
rst3