1 數據集介紹:
虹膜
150個實例
萼片長度,萼片寬度,花瓣長度,花瓣寬度
(sepal length, sepal width, petal length and petal width)
類別:
Iris setosa, Iris versicolor, Iris virginica.
- 利用Python的機器學習庫sklearn: SkLearnExample.py
from sklearn import neighbors
from sklearn import datasets
knn = neighbors.KNeighborsClassifier()
iris = datasets.load_iris()
print iris
knn.fit(iris.data, iris.target)
predictedLabel = knn.predict([[0.1, 0.2, 0.3, 0.4]])
print predictedLabel
3.手寫knn算法:
import csv
import random
import math
import operator
from webbrowser import Opera
def loadDataSet(filename,split,trainingSet=[],testSet=[]):
with open(filename,'rb') as csvfile:
lines=csv.reader(csvfile)
dataset=list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y]=float(dataset[x][y])
if random.random()<split:
trainingSet.append(dataset[x])
else:
testSet.append(dataset[x])
def euclideanDistance(testInstance, trainInstance, lenght):
distance=0;
for x in range(lenght):
distance+=(testInstance[x]-trainInstance[x])**2;
return math.sqrt(distance)
def getNeighbors(trainingSet, testInstance, k):
distances=[]
lenght=len(testInstance)-1
for x in range(len(trainingSet)):
dist=euclideanDistance(testInstance,trainingSet[x],lenght)
distances.append((trainingSet[x],dist))
distances.sort( key=operator.itemgetter(1))
neighbors=[]
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
def getResponse(neighbors):
classVotes={}
for x in range(len(neighbors)):
response=neighbors[x][-1]
if response in classVotes:
classVotes[response]+=1
else:
classVotes[response]=1
sortedVotes=sorted(classVotes.iteritems(),key=operator.itemgetter(1),reverse=True)
return sortedVotes[0][0]
def getAccuracy(testSet, predictions):
correct=0
for x in range(len(testSet)):
if testSet[x][-1]==predictions[x]:
correct+=1
return (correct/float(len(testSet)))*100.0
def main():
trainingSet=[]
testSet=[]
loadDataSet("irisdata.csv", 0.67, trainingSet, testSet)
print "trainingSet:",trainingSet
print "testSet:",testSet
predictions=[]
k=3
for x in range(len(testSet)):
neighbors=getNeighbors(trainingSet,testSet[x],k)
result=getResponse(neighbors)
predictions.append(result)
print ('>predicted='+repr(result)+',actual='+repr(testSet[x][-1]))
accuracy=getAccuracy(testSet,predictions)
print('Accuracy:'+repr(accuracy)+'%')
if __name__=="__main__":
main()