機器學習5-自己的第一個分類器

python 代碼

# coding=utf-8
import random
from sklearn.datasets import load_iris

# 獲取鳶尾數據
iris = load_iris()

X = iris.data
y = iris.target

# 評分公式
from sklearn.metrics import accuracy_score

# 亂寫的一個分類器
# 評分大概
class MyClf():
    def fit(self,train_data,train_target):
        self.train_data = train_data
        self.train_target = train_target


    def predict(self,test_data):
        predictions = []
        for data in test_data:
            # 隨機返回一個結果值
            target = random.choice(self.train_target)
            predictions.append(target)
        return predictions

# 使用k-neighbors原理寫的分類器
# 此處爲了簡單,k取值1
from scipy.spatial import distance

def euc(a,b):
    return distance.euclidean(a,b)

class MyClf2():
    def fit(self,train_data,train_target):
        self.train_data = train_data
        self.train_target = train_target


    def predict(self,test_data):
        predictions = []
        for data in test_data:
            # 隨機返回一個結果值
            target = self.closest(data)
            predictions.append(target)
        return predictions

    def closest(self,row):
        best_dis = euc(row,self.train_data[0])
        best_idx = 0

        for x in xrange(1,len(self.train_data)):
            curr_dis = euc(row,self.train_data[x])
            if best_dis>curr_dis:
                best_idx=x
                best_dis=curr_dis
                pass
        return self.train_target[best_idx]

# cross_validation 改成 model_selection
# 前者好像是版本問題,過期了
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.3)

clf = MyClf()
clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
# print predictions
# 評分大概在.3,因爲target就3個值
print 'myclf score:',accuracy_score(y_test,predictions)


clf = MyClf2()
clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
# print predictions
print 'myclf2 score:',accuracy_score(y_test,predictions)

截圖

這裏寫圖片描述

我們自己寫的分類器的正確率到了97%

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章