Python:HSAL

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold

from active_learning.sampler import hierarchical_clustering_AL
from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import precision_score

def select_batch(sampler, N, already_selected,
                 **kwargs):
    kwargs["N"] = N
    kwargs["already_selected"] = already_selected
    batch_AL = sampler.select_batch(**kwargs)
    return batch_AL

def HSAL(X,y,budget):
    AccList = []
    KappaList = []
    WAP_list = []
    BudgetCut_list = []
    classNum = len(set(y))
    initialNum = 1
    SEED = 100
    model = LogisticRegression(random_state=100,multi_class='multinomial',solver="lbfgs", max_iter=200)
    KF = KFold(n_splits=10,shuffle=True)
    for train_idx,test_idx in KF.split(X):
        budgetCut = 0
        label_obtain = []
        m = len(train_idx)
        # indices = np.arange(m)
        indices = list(range(m))
        X_train = X[train_idx]
        y_train = y[train_idx]
        X_test = X[test_idx]
        y_test = y[test_idx]
        initialPoints = np.random.choice(indices,initialNum,replace=False)    ##類型爲ndarray
        label_obtain.append(y_train[initialPoints[0]])
        indices.remove(initialPoints[0])
        while len(set(y_train[list(initialPoints)])) == 1:
            temPoint =np.random.choice(indices, 1, replace=False)
            initialPoints = np.concatenate((initialPoints,temPoint))
            initialNum += 1
            label_obtain.append(y_train[temPoint[0]])


        if len(np.unique(label_obtain)) == classNum and budgetCut == 0:
            budgetCut = len(label_obtain)

        selected_inds = list(initialPoints)

        sampler = hierarchical_clustering_AL.HierarchicalClusterAL(X_train,y_train,seed=SEED)
        queryNum = budget - initialNum
        for b in range(queryNum):
            batch = 1
            selected_batch_inputs = {"labeled": dict(zip(selected_inds, y_train[selected_inds])), "y": y_train}
            new_selected = select_batch(sampler,batch,selected_inds,**selected_batch_inputs)
            label_obtain.append(y_train[new_selected[0]])

            if budgetCut == 0 and len(np.unique(label_obtain)) == classNum:
                budgetCut = len(label_obtain)

            selected_inds.extend(new_selected)

        model.fit(X_train[selected_inds],y_train[selected_inds])
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test,y_pred)
        kappa = cohen_kappa_score(y_test,y_pred)
        wap = precision_score(y_test,y_pred,average='weighted')
        print("精度===",acc)
        AccList.append(acc)
        KappaList.append(kappa)
        WAP_list.append(wap)
        BudgetCut_list.append(budgetCut)
        #####selected_inds  是選擇的初始樣本

    return AccList,KappaList,WAP_list,BudgetCut_list

if __name__ == '__main__':
    X,y = datasets.load_iris(return_X_y=True)
    budget = 30
    AccList,KappaList,WAP_list,BudgetCut_list = HSAL(X, y, budget)
    print("平均精度=",np.mean(AccList),"精度標準差=",np.std(AccList))
    print("平均kappa=",np.mean(KappaList),"kappa標準差=",np.std(KappaList))
    print("平均WAP=",np.mean(WAP_list),"WAP標準差=",np.std(WAP_list))
    print("平均截斷預算", np.mean(BudgetCut_list), "截斷預算標準差", np.std(BudgetCut_list))

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章