4.2 最近鄰規則分類算法(KNN)應用

1 數據集介紹:

虹膜
這裏寫圖片描述

150個實例
這裏寫圖片描述
萼片長度,萼片寬度,花瓣長度,花瓣寬度
(sepal length, sepal width, petal length and petal width)

類別:
Iris setosa, Iris versicolor, Iris virginica.

  1. 利用Python的機器學習庫sklearn: SkLearnExample.py
from sklearn import neighbors
from sklearn import datasets
knn = neighbors.KNeighborsClassifier()
iris = datasets.load_iris()
print iris
knn.fit(iris.data, iris.target)
predictedLabel = knn.predict([[0.1, 0.2, 0.3, 0.4]])

print predictedLabel

3.手寫knn算法:

import csv 
import random
import math
import operator
from webbrowser import Opera


def loadDataSet(filename,split,trainingSet=[],testSet=[]):
    with open(filename,'rb') as csvfile:
        lines=csv.reader(csvfile)
        dataset=list(lines)
        for x in range(len(dataset)-1):
            for y in range(4):
                dataset[x][y]=float(dataset[x][y])
            if random.random()<split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])





def euclideanDistance(testInstance, trainInstance, lenght):
    distance=0;
    for x in range(lenght):
        distance+=(testInstance[x]-trainInstance[x])**2;
    return math.sqrt(distance)


def getNeighbors(trainingSet, testInstance, k):
    distances=[]
    lenght=len(testInstance)-1
    for x in range(len(trainingSet)):
        dist=euclideanDistance(testInstance,trainingSet[x],lenght)
        distances.append((trainingSet[x],dist))
    distances.sort( key=operator.itemgetter(1))
    neighbors=[]
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors


def getResponse(neighbors):
    classVotes={}
    for x in range(len(neighbors)):
        response=neighbors[x][-1]
        if response in classVotes:
            classVotes[response]+=1
        else:
            classVotes[response]=1
    sortedVotes=sorted(classVotes.iteritems(),key=operator.itemgetter(1),reverse=True)
    return sortedVotes[0][0]


def getAccuracy(testSet, predictions):
    correct=0
    for x in range(len(testSet)):
        if testSet[x][-1]==predictions[x]:
            correct+=1
    return (correct/float(len(testSet)))*100.0


def main():
    trainingSet=[]
    testSet=[]
    loadDataSet("irisdata.csv", 0.67, trainingSet, testSet) 
    print  "trainingSet:",trainingSet
    print "testSet:",testSet

    predictions=[]
    k=3
    for x in range(len(testSet)):
        neighbors=getNeighbors(trainingSet,testSet[x],k)
        result=getResponse(neighbors)
        predictions.append(result)
        print ('>predicted='+repr(result)+',actual='+repr(testSet[x][-1]))
    accuracy=getAccuracy(testSet,predictions)
    print('Accuracy:'+repr(accuracy)+'%')



if __name__=="__main__":
    main()                
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章